def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    unl = load_unlabeled_training(flatten=True)
    unl = standardize(unl)
    test = load_public_test(flatten=True)
    test = standardize(test)
    shuffle_in_unison(images, labels)
    #d = DictionaryLearning().fit(images)
    d = MiniBatchDictionaryLearning(n_components=500, n_iter=500, verbose=True).fit(images)
    s = SparseCoder(d.components_)
    proj_test = s.transform(images)
    pt = s.transform(test)
    #kpca = KernelPCA(kernel="rbf")
    #kpca.fit(unl)
    #test_proj = kpca.transform(images)
    #pt = kpca.transform(test)
    #spca = SparsePCA().fit(unl)
    #test_proj = spca.transform(images)
    #pt = spca.transform(test)
    svc = SVC()
    scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    print scores
    print np.mean(scores)
    print np.var(scores)
    svc.fit(proj_test, labels)
    pred = svc.predict(pt)
    write_results(pred, '../svm_res.csv')
Esempio n. 2
0
    def discriminative_training(self,concatenated_activations,concatenated_bases, verbose = 100):


        # Making copies of concatenated bases and activation. 
        optimal_a = np.copy(concatenated_activations)
        predicted_b = np.copy(concatenated_bases)
        
        '''
        Next step is to modify bases such that, we get optimal A upon sparse coding
        We want to get a_opt on finding activations from b_hat
        '''

        alpha = self.learning_rate
        least_error = 1e10
        total_power = self.total_power
        v_size = .20
        v_index = int(total_power.shape[1] * v_size)
        train_power = total_power[:,:-v_index]
        v_power = total_power[:,-v_index:]
        train_optimal_a = optimal_a[:,:-v_index]
        v_optimal_a = optimal_a[:,-v_index:]

        print ("If Iteration wise errors are not decreasing, then please decrease the learning rate")
        for i in range(self.iterations):

            a = time.time()
            # Finding activations for the given bases
            model = SparseCoder(dictionary=predicted_b.T,positive_code=True,transform_algorithm='lasso_lars',transform_alpha=self.sparsity_coef)
            train_predicted_a = model.transform(train_power.T).T
            model = SparseCoder(dictionary=predicted_b.T,positive_code=True,transform_algorithm='lasso_lars',transform_alpha=self.sparsity_coef)
            val_predicted_a = model.transform(v_power.T).T        
            err = np.mean(np.abs(val_predicted_a - v_optimal_a))

            if err<least_error:
                #print ("Chose the best")
                least_error = err
                best_b = np.copy(predicted_b)
                
            # Modify the bases b_hat so that they result activations closer to a_opt
            T1 = (train_power - predicted_b@train_predicted_a)@train_predicted_a.T
            T2 = (train_power - predicted_b@train_optimal_a)@train_optimal_a.T
            predicted_b = predicted_b - alpha *( T1 - T2)
            predicted_b = np.where(predicted_b>0,predicted_b,0)
            # Making sure that columns sum to 1
            predicted_b = (predicted_b.T/np.linalg.norm(predicted_b.T,axis=1).reshape((-1,1))).T 
            #if i%verbose==0:
            print ("Iteration ",i," Error ",err)

        return  best_b
Esempio n. 3
0
def compute_residual(query, test):
    """Compute the residual of the sparse coding representation (RSCR) 

    Args:
        query (torch.Tensor): Query LR
        test (torch.Tensor): Test LR

    Returns:
        float: RSCR
    """

    D = test.squeeze().detach().numpy()
    Y = query.detach().numpy()

    X = query

    if Y.ndim < 2:
        Y = Y.reshape(1, -1)
    if D.ndim < 2:
        D = D.reshape(1, -1)
    coder = SparseCoder(dictionary=D, transform_algorithm='lasso_lars')
    A = coder.transform(Y)
    A = torch.FloatTensor(A)
    D = torch.FloatTensor(D)
    RSCR = torch.norm(X - torch.mm(A, D)).item()

    return RSCR
Esempio n. 4
0
        def RunSparseCodingScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            dictionary = np.genfromtxt(self.dataset[1], delimiter=',')

            # Get all the parameters.
            l = re.search("-l (\d+)", options)
            l = 0 if not l else int(l.group(1))

            try:
                with totalTimer:
                    # Perform Sparse Coding.
                    model = SparseCoder(dictionary=dictionary,
                                        transform_algorithm='lars',
                                        transform_alpha=l)
                    code = model.transform(inputData)
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Esempio n. 5
0
    def joint_sparse_code_tensor(self, X, W):
        '''
        Given data matrix X and dictionary matrix W, find
        code matrix H such that W*H approximates X

        args:
            X (numpy array): data matrix with dimensions: features (d) x samples (n)
            W (numpy array): dictionary matrix with dimensions: features (d) x topics (r)

        returns:
            H (numpy array): code matrix with dimensions: topics (r) x samples(n)
        '''

        if DEBUG:
            print('sparse_code')
            print('X.shape:', X.shape)
            print('W.shape:', W.shape, '\n')

        # initialize the SparseCoder with W as its dictionary
        # then find H such that X \approx W*H
        if self.alpha == None:
            coder = SparseCoder(dictionary=W.T, transform_n_nonzero_coefs=None,
                                transform_alpha=2, transform_algorithm='lasso_lars', positive_code=True)
        else:
            coder = SparseCoder(dictionary=W.T, transform_n_nonzero_coefs=None,
                                transform_alpha=self.alpha, transform_algorithm='lasso_lars', positive_code=True)
        # alpha = L1 regularization parameter.
        H = coder.transform(X.T)

        # transpose H before returning to undo the preceding transpose on X
        return H
Esempio n. 6
0
def test_sparse_coder():
    db = coco.COCO('/media/zawlin/ssd/coco/annotations/captions_train2014.json')
    ilsvrc_word2vec = zl.load('ilsvrc_word2vec')
    test_word2vec = zl.load('test_word2vec')
    D = []
    idx2word = []
    for i in xrange(len(ilsvrc_word2vec.keys())):
        idx2word.append(ilsvrc_word2vec.keys()[i])
        D.append(ilsvrc_word2vec[ilsvrc_word2vec.keys()[i]])
    idx2word = np.array(idx2word)
    D = np.array(D)
    print 'loading word2vec'
    model = gensim.models.Word2Vec.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True)
    coder = SparseCoder(dictionary=D, transform_n_nonzero_coefs=5,
                            transform_alpha=None, transform_algorithm='lasso_cd')
    #random.shuffle(db.anns)
    for k in db.anns:
        cap = db.anns[k]['caption']
        splited = cap.split(' ')
        for s in splited:
            if s.lower() in model:
                y = model[s.lower()]
                x  = coder.transform(y.reshape(1,-1))[0]
                print '%s = %s'%(s.lower(),idx2word[np.argsort(x)[::-1]])
                print x[np.argsort(x)[::-1]]
        c = raw_input('press q to quit')
        if c== 'q':
            exit(0)
Esempio n. 7
0
    def sparse_code(self, X, W):
        '''
        Given data matrix X and dictionary matrix W, find
        code matrix H such that W*H approximates X

        args:
            X (numpy array): data matrix with dimensions: data_dim (d) x samples (n)
            W (numpy array): dictionary matrix with dimensions: data_dim (d) x topics (r)

        returns:
            H (numpy array): code matrix with dimensions: topics (r) x samples(n)
        '''

        if DEBUG:
            print('sparse_code')
            print('X.shape:', X.shape)
            print('W.shape:', W.shape, '\n')

        # extract matrix dimensions from X, W
        # and initialize H with appropriate dimensions
        d, n = np.shape(X)
        d, r = np.shape(W)
        H = np.zeros([n, r])

        # initialize the SparseCoder with W as its dictionary
        # then find H such that X \approx W*H
        coder = SparseCoder(dictionary=W.T,
                            transform_n_nonzero_coefs=None,
                            transform_alpha=2,
                            transform_algorithm='lasso_lars',
                            positive_code=False)
        H = coder.transform(X.T)
        # transpose H before returning to undo the preceding transpose on X
        return H.T
Esempio n. 8
0
    def RunSparseCodingScikit():
      totalTimer = Timer()

      # Load input dataset.
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      dictionary = np.genfromtxt(self.dataset[1], delimiter=',')

      # Get all the parameters.
      opts = {}
      if "lambda" in options:
        opts["transform_alpha"] = options.pop("lambda")
      if "max_iterations" in options:
        opts["max_iter"] = options.pop("max_iterations")
      opts["transform_algorithm"] = "lars"
      opts["dictionary"] = dictionary

      if len(options) > 0:
        Log.Fatal("Unknown parameters: " + str(options))
        raise Exception("unknown parameters")

      try:
        with totalTimer:
          # Perform Sparse Coding.
          model = SparseCoder(**opts)
          code = model.transform(inputData)
      except Exception as e:
        return -1

      return totalTimer.ElapsedTime()
Esempio n. 9
0
    def predict(self, X_test):
        n_samples = X_test.shape[1]
        X = X_test

        D = self.D
        W = self.W

        D = np.nan_to_num(D)
        D1 = np.sum(np.abs(D)**2, axis=0)**(1. / 2)
        for i in range(D.shape[1]):
            D[:, i] = D[:, i] / D1[i]
        #print(D,X)

        print("predicting")
        coder = SparseCoder(dictionary=np.transpose(D),
                            transform_n_nonzero_coefs=self.n_nonzero,
                            transform_algorithm='omp')
        Z = coder.transform(np.transpose(X))
        #print(Z.shape, Z)
        #print(np.count_nonzero(Z))
        #print(np.count_nonzero(Z, axis=0))
        pred = np.zeros((n_samples, ))
        for i in range(n_samples):
            pred[i] = np.argmax(np.dot(W, Z[i, :]))
        return pred
Esempio n. 10
0
        def RunSparseCodingScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            inputData = np.genfromtxt(self.dataset[0], delimiter=',')
            dictionary = np.genfromtxt(self.dataset[1], delimiter=',')

            # Get all the parameters.
            opts = {}
            if "lambda" in options:
                opts["transform_alpha"] = options.pop("lambda")
            if "max_iterations" in options:
                opts["max_iter"] = options.pop("max_iterations")
            opts["transform_algorithm"] = "lars"
            opts["dictionary"] = dictionary

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            try:
                with totalTimer:
                    # Perform Sparse Coding.
                    model = SparseCoder(**opts)
                    code = model.transform(inputData)
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
def sparse_coding_with_LSC(Y_labelled,D,H,W,gamma,lamda):
    _Y = np.vstack((Y_labelled, np.sqrt(gamma) * H))
    _D = np.vstack((D, np.sqrt(gamma) * W))
    
    coder = SparseCoder(dictionary=_D.T,transform_alpha=lamda/2., transform_algorithm='lasso_cd')
    X =(coder.transform(_Y.T)).T
    return X
Esempio n. 12
0
def get_activations(stft, dico, n_nonzero_coefs=None):
    coder = SparseCoder(
        dictionary=dico.T,
        transform_n_nonzero_coefs=n_nonzero_coefs,
        transform_algorithm="lasso_cd",
        positive_code=True)
    return coder.transform(stft.T).T
Esempio n. 13
0
def omp_sparse(dictionary, train_data):

    dictionary = dictionary.transpose()

    w = SparseCoder(dictionary, transform_algorithm='omp')
    t = w.transform(train_data)
    return t
Esempio n. 14
0
def SparseRepresentation(X,D,A_mean,param,l1,transform_n_nonzero_coefs):
    if len(D.shape)>2:
        featureDim=D.shape[1]
        D=D.transpose(1,0,2).reshape((featureDim,-1))
    # par = Params()
    # par.lambda1 = param.lambda1;
    # par.lambda2 = param.lambda2;
    # D=np.hstack((D[0],D[1],D[2],D[3],D[4]))
    # A_ini = np.ones((transform_n_nonzero_coefs, X.shape[1]))
    # par.A_mean = A_mean[0]
    # if D.shape[0] >= D.shape[1]:
    #     w, v = LA.eig(D.T @ D)
    #     par.c = 1.05 * w.max()
    # else:
    #     w, v = LA.eig(D @ D.T)
    #     par.c = 1.05 * w.max()
    # opts = Coef_Update_Test(X, D, A_ini, par)
    # A_test = opts.A
    # P = LA.inv(D.T @ D + l1 * np.eye(D.shape[1])) @ D.T
    # A_test = P @ X
    coder = SparseCoder(dictionary=D.T, transform_n_nonzero_coefs=transform_n_nonzero_coefs,
                        transform_algorithm="omp")
    A_test = (coder.transform(X.T)).T
    A_test_nonzero=A_test
    # A_test_nonzero=np.empty((transform_n_nonzero_coefs, X.shape[1]))
    # for i in range(A_test.shape[1]):
    #     A_test_nonzero[:,i] = A_test[:,i][A_test[:,i] != 0]
    return A_test_nonzero
    def reconstruct_image_color(self,
                                loading,
                                recons_resolution=1,
                                if_save=True):
        print('reconstructing given network...')
        '''
        Reconstruct original color image using lerned CP dictionary atoms
        '''
        A = self.data  # A.shape = (row, col, 3)
        CPdict = self.out(loading)
        k = self.patch_size
        W = np.zeros(shape=(3 * k**2, self.n_components))
        for j in np.arange(self.n_components):
            W[:, j] = CPdict.get('A' + str(j)).reshape(-1, 1)[:, 0]

        A_matrix = A.reshape(-1, A.shape[1])  # (row, col, 3) --> (3row, col)
        [m, n] = A_matrix.shape
        A_recons = np.zeros(shape=A.shape)
        A_overlap_count = np.zeros(shape=(A.shape[0], A.shape[1]))
        k = self.patch_size
        t0 = time()
        c = 0
        num_rows = np.floor(
            (A_recons.shape[0] - k) / recons_resolution).astype(int)
        num_cols = np.floor(
            (A_recons.shape[1] - k) / recons_resolution).astype(int)

        for i in np.arange(0, A_recons.shape[0] - k, recons_resolution):
            for j in np.arange(0, A_recons.shape[1] - k, recons_resolution):
                patch = A[i:i + k, j:j + k, :]
                patch = patch.reshape((-1, 1))
                coder = SparseCoder(dictionary=W.T,
                                    transform_n_nonzero_coefs=None,
                                    transform_alpha=1,
                                    transform_algorithm='lasso_lars',
                                    positive_code=True)
                # alpha = L1 regularization parameter. alpha=2 makes all codes zero (why?)
                code = coder.transform(patch.T)
                patch_recons = np.dot(W, code.T).T
                patch_recons = patch_recons.reshape(k, k, 3)

                # now paint the reconstruction canvas
                for x in itertools.product(np.arange(k), repeat=2):
                    c = A_overlap_count[i + x[0], j + x[1]]
                    A_recons[i + x[0], j +
                             x[1], :] = (c * A_recons[i + x[0], j + x[1], :] +
                                         patch_recons[x[0], x[1], :]) / (c + 1)
                    A_overlap_count[i + x[0], j + x[1]] += 1

                # progress status
                print('reconstructing (%i, %i)th patch out of (%i, %i)' %
                      (i / recons_resolution, j / recons_resolution, num_rows,
                       num_cols))
        print('Reconstructed in %.2f seconds' % (time() - t0))
        print('A_recons.shape', A_recons.shape)
        if if_save:
            np.save('Video_dictionary/video_recons_color', A_recons)
        plt.imshow(A_recons)
        return A_recons
Esempio n. 16
0
def parallel_sc(args):

    (dico, p) = args
    coder = SparseCoder(dictionary=dico, transform_algorithm='omp')
    #by default, number of non zero coefficients is 0.1 * n_features
    #Zeyde et al.: 3 non zero coefficients !
    code = coder.transform(p).astype(np.float32)
    return code
Esempio n. 17
0
def test_sparse_coder_dtype_match(data_type, transform_algorithm):
    # Verify preserving dtype for transform in sparse coder
    n_components = 6
    rng = np.random.RandomState(0)
    dictionary = rng.randn(n_components, n_features)
    coder = SparseCoder(dictionary.astype(data_type),
                        transform_algorithm=transform_algorithm)
    code = coder.transform(X.astype(data_type))
    assert code.dtype == data_type
    def sparse_code_proximal(self, X, W, a1, a2):
        '''
        Given data matrix X and dictionary matrix W, find
        code matrix H and noise matrix S such that
        H, S = argmin ||X - WH - S||_{F}^2 + \alpha ||H||_{1}  + \beta ||S||_{1}
        Uses proximal gradient

        G = [H \\ S']
        V = [W, b I] (so that VG = WH + bS')

        Then solve

        min_{G,V} |X-VG|_{F} + \alpha |G|_{1} =
        = min_{H,S'} |X - HW - bS'|_{F}^2 + \alpha |H|_{1} + \alpha |S'|_{1}
        = min_{H,S} |X - HW - S|_{F}^2 + \alpha |H|_{1} + (\alpha/b)|S|_{1}

        using constrained LASSO

        args:
            X (numpy array): data matrix with dimensions: features (d) x samples (n)
            W (numpy array): dictionary matrix with dimensions: features (d) x topics (r)

        returns:
            H (numpy array): code matrix with dimensions: topics (r) x samples(n)
            S (numpy array): noise matrix with dimensions: features (d) x samples (n)
        '''

        if DEBUG:
            print('sparse_code')
            print('X.shape:', X.shape)
            print('W.shape:', W.shape, '\n')

        # initialize the SparseCoder with W as its dictionary
        # H_new = LASSO with W as dictionary
        # S_new = LASSO with id (d x d) as dictionary
        # Y_new = Y + (W H_new + S_new - S) : Dual variable

        ### Initialization
        d, n = X.shape
        r = self.n_components

        ### Augmented dictionary matrix for proximal gradient
        V = np.hstack((W, a2*np.identity(d)))

        ### Proximal sparse coding by constrained LASSO
        coder = SparseCoder(dictionary=V.T, transform_n_nonzero_coefs=None,
                            transform_alpha=a1, transform_algorithm='lasso_lars', positive_code=True)
        G = coder.transform(X.T)
        G = G.T
        # transpose G before returning to undo the preceding transpose on X

        ### Read off H and S from V
        H = G[0:r, :]
        S = a2*G[r:, :]

        return H, S
Esempio n. 19
0
def sc_result_analysis():
    """
        对稀疏编码的结果进行分析i
    :return:
    """
    sc_file = open('./tmp_file/30_dictionary.pickle', 'rb')
    sc_list = cPickle.load(sc_file)

    classified_file = open('./tmp_file/30_class_result.pickle', 'rb')
    (classified_feature, classified_patch) = cPickle.load(classified_file)

    model_file = open('./tmp_file/30_kmeans_pca_model.pickle', 'rb')
    (k_means, pca) = cPickle.load(model_file)

    sc_file.close()
    classified_file.close()
    model_file.close()

    # ========================================================================
    for i in range(5):
        k = i

        #v_feature = pca.transform(classified_feature[1][k]).reshape((-1,))
        v_feature = classified_feature[3][k]
        v_patch = classified_patch[3][k]

        feature_dict = sc_list[0][:, :144]
        patch_dict = sc_list[0][:, 144:]

        #v_feature = feature_dict[0]
        #v_patch = patch_dict[0]

        coder = SparseCoder(dictionary=feature_dict, transform_algorithm='omp',
                            transform_alpha=0.01, n_jobs=2, transform_n_nonzero_coefs=1)

        weight = coder.transform(v_feature)

        v_patch = v_patch.reshape((9, 9))
        result = np.dot(weight, patch_dict).reshape((9, 9))

        mask = weight != 0
        print weight[mask]
        mask = mask[0]

        print len(patch_dict[mask])
        print len(patch_dict[mask])
        patch_show(patch_dict[mask],[0,0,0.45,0.45],1)

        ax2 = plt.axes([0, 0.5, 0.45, 0.45])
        ax2.imshow(result, interpolation="none", cmap=cm.gray)

        ax2 = plt.axes([0.5, 0.5, 0.45, 0.45])
        ax2.imshow(v_patch, interpolation="none", cmap=cm.gray)

        plt.show()
Esempio n. 20
0
def train(DWA_all, D_all, W_all, A_all, Cs, labels, file_paths,
          inds_of_file_path, train_number, start_init_number, update_times,
          update_index, n_classes, n_atoms, n_features, lambda_init,
          the_lambda, transform_n_nonzero_coefs, omp_tag):
    for j in range(n_classes):
        if j == 0:
            print(update_index)
            sys.stdout.flush()
        coder = SparseCoder(
            dictionary=D_all.T,
            transform_n_nonzero_coefs=transform_n_nonzero_coefs,
            transform_algorithm='omp')
        label_indexs_for_update = inds_of_file_path[j][:train_number]
        new_index = [
            label_indexs_for_update[(update_index + start_init_number) %
                                    train_number]
        ]
        new_label = labels[new_index][0]
        lab_index = j
        im_vec = load_img(file_paths[new_index][0])
        im_vec = im_vec / 255.
        new_y = np.array(im_vec, dtype=float)
        new_y = preprocessing.normalize(new_y.T, norm='l2').T
        new_y = norm_Ys(new_y)
        new_y = new_y.reshape(n_features, 1)
        new_h = np.zeros((n_classes, 1))
        new_h[lab_index, 0] = 1
        new_q = np.zeros((n_atoms * n_classes, 1))
        new_q[n_atoms * lab_index:n_atoms * (lab_index + 1), 0] = 1
        new_yhq = np.vstack((new_y, new_h, new_q))
        new_x = None
        if omp_tag == "true":
            new_x = (coder.transform(new_y.T)).T
        if omp_tag == "wzz":
            new_x = transform(D_all, new_y, transform_n_nonzero_coefs)
        the_C = Cs
        the_u = (1 / the_lambda) * np.dot(the_C, new_x)
        gamma = 1 / (1 + np.dot(new_x.T, the_u))
        the_r = new_yhq - np.dot(DWA_all, new_x)
        new_C = (1 / the_lambda) * the_C - gamma * np.dot(the_u, the_u.T)
        new_DWA = DWA_all + gamma * np.dot(the_r, the_u.T)
        DWA_all = new_DWA
    part_lambda = (1 - update_index / update_times)
    the_lambda = 1 - (1 -
                      lambda_init) * part_lambda * part_lambda * part_lambda
    D_all = DWA_all[0:D_all.shape[0], :]
    W_all = DWA_all[D_all.shape[0]:D_all.shape[0] + W_all.shape[0], :]
    A_all = DWA_all[D_all.shape[0] + W_all.shape[0]:, :]
    D_all = preprocessing.normalize(D_all.T, norm='l2').T
    W_all = preprocessing.normalize(W_all.T, norm='l2').T
    A_all = preprocessing.normalize(A_all.T, norm='l2').T
    DWA_all = np.vstack((D_all, W_all, A_all))
    return DWA_all, D_all, W_all, A_all, the_lambda
Esempio n. 21
0
def sparse_codifier(y,D, transform_algo = 'omp', transform_n_nonzero_coefs = 2):
    """
    Encodes an input vector y into an output sparse vector x, 
    based on a given dictionary D
    Retired, to delete.
    """
    print('y.shape = %s ' % (y.shape,))
    print('Dictionary shape = %s' % (D.shape,))
    coder = SparseCoder(dictionary = D, transform_algorithm = transform_algo)
    x = coder.transform(y)
    print('x.shape = %s' % (x.shape,))
    
    return x
Esempio n. 22
0
def sparse_codifier(y, D, transform_algo='omp', transform_n_nonzero_coefs=2):
    """
    Encodes an input vector y into an output sparse vector x, 
    based on a given dictionary D
    Retired, to delete.
    """
    print('y.shape = %s ' % (y.shape, ))
    print('Dictionary shape = %s' % (D.shape, ))
    coder = SparseCoder(dictionary=D, transform_algorithm=transform_algo)
    x = coder.transform(y)
    print('x.shape = %s' % (x.shape, ))

    return x
Esempio n. 23
0
	def predict_svm(self, img):
		gray  = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
		gray  = cv2.resize(gray, (100,100))
		gray  = np.array(gray).reshape(1,100*100)
		#X = self.pca.transform(gray)

		D = np.array(self.pca.components_)
		dictionary = SparseCoder(D, transform_algorithm='omp', transform_n_nonzero_coefs=10, transform_alpha=None)
		features = dictionary.transform(gray)
		label = self.svm.predict(features)
		score = None

		return label, score
    def reconstruct_image_color(self, path, recons_resolution=1):
        print('reconstructing given network...')
        '''
        Note: For WAN data, the algorithm reconstructs the normalized WAN matrix A/np.max(A). 
        Scale the reconstructed matrix B by np.max(A) and compare with the original network. 
        '''
        A = self.read_img_as_array(path)  # A.shape = (row, col, 3)
        A_matrix = A.reshape(-1, A.shape[1])  # (row, col, 3) --> (3row, col)
        [m, n] = A_matrix.shape
        A_recons = np.zeros(shape=A.shape)
        A_overlap_count = np.zeros(shape=(A.shape[0], A.shape[1]))
        k = self.patch_size
        t0 = time()
        c = 0
        num_rows = np.floor(
            (A_recons.shape[0] - k) / recons_resolution).astype(int)
        num_cols = np.floor(
            (A_recons.shape[1] - k) / recons_resolution).astype(int)

        for i in np.arange(0, A_recons.shape[0] - k, recons_resolution):
            for j in np.arange(0, A_recons.shape[1] - k, recons_resolution):
                patch = A[i:i + k, j:j + k, :]
                patch = patch.reshape((-1, 1))
                # print('patch.shape', patch.shape)
                coder = SparseCoder(dictionary=self.W.T,
                                    transform_n_nonzero_coefs=None,
                                    transform_alpha=1,
                                    transform_algorithm='lasso_lars',
                                    positive_code=True)
                # alpha = L1 regularization parameter. alpha=2 makes all codes zero (why?)
                code = coder.transform(patch.T)
                patch_recons = np.dot(self.W, code.T).T
                patch_recons = patch_recons.reshape(k, k, 3)

                # now paint the reconstruction canvas
                for x in itertools.product(np.arange(k), repeat=2):
                    c = A_overlap_count[i + x[0], j + x[1]]
                    A_recons[i + x[0], j +
                             x[1], :] = (c * A_recons[i + x[0], j + x[1], :] +
                                         patch_recons[x[0], x[1], :]) / (c + 1)
                    A_overlap_count[i + x[0], j + x[1]] += 1

                # progress status
                print('reconstructing (%i, %i)th patch out of (%i, %i)' %
                      (i / recons_resolution, j / recons_resolution, num_rows,
                       num_cols))
        print('Reconstructed in %.2f seconds' % (time() - t0))
        print('A_recons.shape', A_recons.shape)
        np.save('Image_dictionary/img_recons_color', A_recons)
        plt.imshow(A_recons)
        return A_recons
Esempio n. 25
0
    def __sparse_encode(self, D, test_X):
        """

        Z.shape = (test_size, atoms_size=dict_size)

        """

        coder = SparseCoder(dictionary=D,
                            transform_algorithm='lasso_cd',
                            transform_alpha=self.coder_alpha)
        coder.fit(test_X)
        Z = coder.transform(test_X)

        return Z
Esempio n. 26
0
    def reconstruct_network(self, path, recons_iter=100):
        print('reconstructing given network...')
        '''
        Note: For WAN data, the algorithm reconstructs the normalized WAN matrix A/np.max(A). 
        Scale the reconstructed matrix B by np.max(A) and compare with the original network. 
        '''

        A = self.A
        [N, N] = A.shape
        A_recons = np.zeros(shape=(N, N))
        A_overlap_count = np.zeros(shape=(N, N))
        B = self.path_adj(self.k1, self.k2)
        k = self.k1 + self.k2 + 1  # size of the network patch
        x0 = np.random.choice(np.arange(0, N))
        emb = self.tree_sample(B, x0)
        t0 = time()
        c = 0

        for t in np.arange(recons_iter):
            patch, emb = self.get_single_patch_glauber(B, emb)
            coder = SparseCoder(dictionary=self.W.T,
                                transform_n_nonzero_coefs=None,
                                transform_alpha=0,
                                transform_algorithm='lasso_lars',
                                positive_code=True)
            # alpha = L1 regularization parameter. alpha=2 makes all codes zero (why?)
            # This only occurs when sparse coding a single array
            code = coder.transform(patch.T)
            patch_recons = np.dot(self.W, code.T).T
            patch_recons = patch_recons.reshape(k, k)
            for x in itertools.product(np.arange(k), repeat=2):
                a = emb[x[0]]
                b = emb[x[1]]
                j = A_overlap_count[a, b]
                A_recons[a, b] = (j * A_recons[a, b] +
                                  patch_recons[x[0], x[1]]) / (j + 1)
                # A_recons[a,b] = A_recons[a,b] + patch_recons[x[0], x[1]]
                A_overlap_count[a, b] += 1

            # progress status
            if 100 * t / recons_iter % 1 == 0:
                print(t / recons_iter * 100)

        print('Reconstructed in %.2f seconds' % (time() - t0))
        np.save(
            'Network_dictionary/WAN/twain_recons' + "_" + str(self.k1) +
            str(self.k2) + "_" + str(self.n_components), A_recons)
        return A_recons
def generateWalk2vecSC(D, times, n, vs, l):
    coder = SparseCoder(dictionary=D.components_, transform_alpha=l)
    path = input("Enter filename to save datasets...\n")
    with open(path, 'w') as f:
        try:
            for i in range(times):
                v = coder.transform(vs[i * n:(i + 1) * n])
                v = np.average(v, axis=0)
                f.write(str(v.tolist()) + '\n')
                sys.stdout.write("\r{}/{} finished".format(i + 1, times))
        except:
            os.remove(path)
            traceback.print_exc()
            exit(1)

    print("\rData successfully generated")
Esempio n. 28
0
def test_sparse_coder_estimator_clone():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V**2, axis=1)[:, np.newaxis]
    coder = SparseCoder(dictionary=V,
                        transform_algorithm='lasso_lars',
                        transform_alpha=0.001)
    cloned = clone(coder)
    assert id(cloned) != id(coder)
    np.testing.assert_allclose(cloned.dictionary, coder.dictionary)
    assert id(cloned.dictionary) != id(coder.dictionary)
    assert cloned.n_components_ == coder.n_components_
    assert cloned.n_features_in_ == coder.n_features_in_
    data = np.random.rand(n_samples, n_features).astype(np.float32)
    np.testing.assert_allclose(cloned.transform(data), coder.transform(data))
    def sparse_code_affine(self, X, W, a1, num_blocks):
        '''
        Given data matrix X and dictionary matrix W, find
        code matrix H and affine translations for each blocks in X so that X \approx WH + block-translation.
        For the case when X has a single block, this is X \approx WH + bI.
        Use alternating optimization -- fix b, find H by LASSO; fix H, find b by MSE, which gives b = mean(X-WH).

        args:
            X (numpy array): data matrix with dimensions: features (d) x samples (n)
            W (numpy array): dictionary matrix with dimensions: features (d) x topics (r)

        returns:
            H (numpy array): code matrix with dimensions: topics (r) x samples(n)
            S (numpy array): noise matrix with dimensions: features (d) x samples (n). (d) rows are partitioned into
            "num_blocks" blocks, in which the entries of S are constant.
        '''

        if DEBUG:
            print('sparse_code')
            print('X.shape:', X.shape)
            print('W.shape:', W.shape, '\n')

        # initialize the SparseCoder with W as its dictionary
        # H_new = LASSO with W as dictionary
        # S_new = LASSO with id (d x d) as dictionary
        # Y_new = Y + (W H_new + S_new - S) : Dual variable
        block_iter = 1
        nb = num_blocks
        H = []
        S = np.zeros(shape=X.shape)
        for step in np.arange(block_iter):
            ### Optimize H by constrained LASSO
            coder = SparseCoder(dictionary=W.T, transform_n_nonzero_coefs=None,
                                transform_alpha=a1, transform_algorithm='lasso_lars', positive_code=True)
            H = coder.transform((X - S).T)
            H = H.T

            ### Optimiez S by solving MSE
            l = np.floor(X.shape[0]/nb).astype(int)
            for i in np.arange(l):
                Y = X - W @ H
                print('Y', np.sum(Y))
                print('S', np.sum(S))
                S[nb*i : nb*(i+1), 0] = np.mean(Y[nb*i : nb*(i+1), 0])  # solution to block-MSE
            S[nb * l:, 0] = np.mean((X - W @ H)[nb * l:, 0])  # solution to block-MSE

        return H, S
    def predict_joint_single(self, data, a1):
        k = self.patch_size
        L = self.prediction_length
        A = data  # A.shape = (self.data.shape[0], k-L, self.data.shape[2])
        # A_recons = np.zeros(shape=(A.shape[0], k, A.shape[2]))
        # W_tensor = self.W.reshape((k, A.shape[0], -1))
        # print('A.shape', A.shape)
        W_tensor = self.W.reshape(
            (self.data.shape[0], k, self.data.shape[2], -1))
        # print('W.shape', W_tensor.shape)

        # for missing data, not needed for the COVID-19 data set
        # extract only rows of nonnegative values (disregarding missing entries) (negative = N/A)

        J = np.where(np.min(A, axis=(0, 1)) >= -1)
        A_pos = A[:, :, J]
        # print('A_pos', A_pos)
        # print('np.min(A)', np.min(A))
        W_tensor = W_tensor[:, :, J, :]
        W_trimmed = W_tensor[:, 0:k - L, :, :]
        W_trimmed = W_trimmed.reshape((-1, self.n_components))

        patch = A_pos

        # print('patch', patch)

        patch = patch.reshape((-1, 1))
        # print('patch.shape', patch.shape)

        # print('patch', patch)

        coder = SparseCoder(dictionary=W_trimmed.T,
                            transform_n_nonzero_coefs=None,
                            transform_alpha=a1,
                            transform_algorithm='lasso_lars',
                            positive_code=True)
        # alpha = L1 regularization parameter
        code = coder.transform(patch.T)
        patch_recons = np.dot(
            self.W,
            code.T).T  # This gives prediction on the last L missing entries
        patch_recons = patch_recons.reshape(-1, k, A.shape[2])

        # now paint the reconstruction canvas
        # only add the last predicted value
        A_recons = patch_recons[:, k - 1, :]
        return A_recons[:, np.newaxis, :]
Esempio n. 31
0
def genD_X(p):

    Site = p[0]
    Y = p[1][0]
    D = p[1][1]

    n_nonzero = 1
    algo = 'omp'
    #Y_mod=np.reshape(Y, (1,N))
    Y_mod = Y.T
    D_mod = D.T
    #D = np.random.randn(K,N)
    coder = SparseCoder(dictionary=D_mod, transform_n_nonzero_coefs=n_nonzero, transform_alpha=None, transform_algorithm=algo)
    X = coder.transform(Y_mod)
    X_mod = X.T
    #X-returned has shape (k,s)
    #shape of Y is (n,s) while D is (n,k)
    return (Site, (Y,D,X_mod))
Esempio n. 32
0
    def train_one_time(i,incoherent_key):
        if i % 10 == 0:
            # print(i)
            sys.stdout.flush()
        coder = SparseCoder(dictionary=D.T, transform_n_nonzero_coefs=transform_n_nonzero_coefs,
                            transform_algorithm="omp")
        the_B = Bs
        the_C = Cs
        im_vec = image_vecs[:, i%n_data]
        new_y = np.array(im_vec, dtype=float)
        new_y = new_y.reshape(n_features, 1)
        new_x = (coder.transform(new_y.T)).T
        # new_x=transform(D,new_y,transform_n_nonzero_coefs)
        new_B = the_B + np.dot(new_y, new_x.T)
        new_C = the_C - (np.matrix(the_C) * np.matrix(new_x) * np.matrix(new_x.T) * np.matrix(the_C)) / (
                    np.matrix(new_x.T) * np.matrix(the_C) * np.matrix(
                new_x) + 1)  # matrix inversion lemma(Woodbury matrix identity)
        Bs[:] = new_B
        Cs[:] = new_C
        new_D = np.dot(new_B, new_C)
        D_diff=new_D-D
        D[:] = copy.deepcopy(new_D)
        Ds[:] = D
        Ds[:] = preprocessing.normalize(Ds.T, norm='l2').T
        if i==incoherent_key:
            pass
            # print("Start reduce coherence")
            # D_all = D
            # coder = SparseCoder(dictionary=D_all.T, transform_n_nonzero_coefs=transform_n_nonzero_coefs,
            #                     transform_algorithm="omp")
            # the_X = (coder.transform(image_vecs.T)).T
            # D_new_all = incoherent_3000(D_all, image_vecs, the_X, 1)
            # D[:]=D_new_all
            # Ds[:] = D
            # Ds[:] = preprocessing.normalize(Ds.T, norm='l2').T

        # print(abs(D_diff).max())
        # print(abs(D_diff).mean())
        # print()
        # coder = SparseCoder(dictionary=Ds.T, transform_n_nonzero_coefs=transform_n_nonzero_coefs,
        #                     transform_algorithm="omp")
        # X_all = (coder.transform(image_vecs.T)).T
        # return Ds,X_all
        return Ds
Esempio n. 33
0
def tracklet_classify(A, pca, D, knn, clf_coding):
    encode_fea = np.zeros((len(A),len(D)))
    for n in range(len(A)):
        pca_fea = pca.transform(A[n])
        dist = distance.cdist(pca_fea, D, 'euclidean')
        x = np.zeros((len(pca_fea),len(D)))
        for k in range(len(dist)):
            sort_idx = np.argsort(dist[k,:])
            temp_D = D[sort_idx[0:knn],:]
            temp_coder = SparseCoder(dictionary=temp_D, transform_n_nonzero_coefs=10, 
                                 transform_alpha=0.05, transform_algorithm='lasso_lars')
            #import pdb; pdb.set_trace()
            xx = np.zeros((1,D.shape[1]))
            xx[:,:] = pca_fea[k,:]
            temp_x = temp_coder.transform(xx)
            x[k,sort_idx[0:knn]] = temp_x

        encode_fea[n,:] = np.max(x, axis=0)
    pred_set_label = clf_coding.predict(encode_fea)
    return pred_set_label
Esempio n. 34
0
def findCoeffsMouth(points, keyposes):
    
    target = np.array(points).astype(float)
    target_mouth = target[48:68]
    target_mouth = np.reshape(target_mouth, (1,40))
    
    dict_2d = np.array(keyposes).astype(float)
    dict_2d_mouth=[]
    
    for i in range(dict_2d.shape[0]):
        dict_2d_mouth.append(dict_2d[i][48:68])

    dict_2d_mouth = np.array(dict_2d_mouth)
    dict_2d_mouth = np.reshape(dict_2d_mouth, (dict_2d.shape[0], 40))
 
    coder = SparseCoder(dictionary=dict_2d_mouth, transform_n_nonzero_coefs=None,
                    transform_alpha=10, transform_algorithm='lasso_lars')

    coeffs = coder.transform(target_mouth)

    return coeffs
def denoiseImg(img, D):
    """Denoise prediction patches img using the dictionary D."""
    
    img_width = img.shape[0]
    img_height = img.shape[1]
    stride = 1  # denoise overlapping patches
    result = np.zeros((img_width, img_height))
    counts = np.zeros((img_width, img_height))
    
    coder = SparseCoder(dictionary=D, transform_n_nonzero_coefs=1, transform_alpha=1, transform_algorithm='omp')

    for i in range(0, img_height - const.DICT_PATCH_SIZE[1] + 1, stride):
        for j in range(0, img_width - const.DICT_PATCH_SIZE[0] + 1, stride):
            ptc = img[j:j+const.DICT_PATCH_SIZE[0], i:i+const.DICT_PATCH_SIZE[1]]
            x = coder.transform(ptc.reshape(1, -1))
            x = np.ravel(np.dot(x, D))
            x = x.reshape(const.DICT_PATCH_SIZE[0], const.DICT_PATCH_SIZE[1])
            result[j:j+const.DICT_PATCH_SIZE[0], i:i+const.DICT_PATCH_SIZE[1]] += x
            counts[j:j+const.DICT_PATCH_SIZE[0], i:i+const.DICT_PATCH_SIZE[1]] += np.ones(const.DICT_PATCH_SIZE)

    l = 0.0
    return (result + l * img) / (counts + l)
Esempio n. 36
0
def slp_train_svm(folder, output):
	path = str.format('{0}/*', folder)
	data = []
	cls  = []
	c    = 0
	for folder in glob.glob(path):
		for img_path in glob.glob(folder + '/*.jpg'):
			print 'Loading: ', img_path
			img = cv2.imread(img_path, 0)
			img = np.array(img).reshape(img.shape[0]*img.shape[1])
			data.append(img)
			cls.append(c)
		c = c + 1
	data = np.array(data)
	pca = PCA(n_components=10)
	pca.fit(data)
	
	D = np.array(pca.components_)
	dictionary = SparseCoder(D, transform_algorithm='omp', transform_n_nonzero_coefs=10, transform_alpha=None)
	features = dictionary.transform(data)
	svm = LinearSVC()
	svm.fit(features, cls)
	pickle.dump(svm, open(output, 'wb'))
Esempio n. 37
0
    def RunSparseCodingScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      inputData = np.genfromtxt(self.dataset[0], delimiter=',')
      dictionary = np.genfromtxt(self.dataset[1], delimiter=',')

      # Get all the parameters.
      l = re.search("-l (\d+)", options)
      l = 0 if not l else int(l.group(1))

      try:
        with totalTimer:
          # Perform Sparse Coding.
          model = SparseCoder(dictionary=dictionary, transform_algorithm='lars',
              transform_alpha=l)
          code = model.transform(inputData)
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Esempio n. 38
0
def scskl_reconstruction(data,mask,D):
  output = np.zeros(data.shape)
  fmap = np.zeros((D.shape[0]))
  #fdata = np.zeros((data.shape[0],data.shape[1],data.shape[2],D.shape[0]))

  px = np.int(np.around(np.power(D.shape[1],1/3))) #patch size (is assumed to be isotropic)
  hpx = np.floor(px/2).astype(int)
  nblock = 2 # number of block per dimension
  subsize = np.ceil(np.array(data.shape) / nblock).astype(int)
  
  med = np.median(data)
  currentblock = 1
  
  for x in range(np.ceil(data.shape[0]/subsize[0]).astype(int)):
    xmin = x*subsize[0]
    xmax = np.min((data.shape[0],(x+1)*subsize[0]))
    for y in range(np.ceil(data.shape[1]/subsize[1]).astype(int)):
      ymin = y*subsize[1]
      ymax = np.min((data.shape[1],(y+1)*subsize[1]))
      for z in range(np.ceil(data.shape[2]/subsize[2]).astype(int)):  
        zmin = z*subsize[2]
        zmax = np.min((data.shape[2],(z+1)*subsize[2]))
        
        print('Processing block : ',currentblock)
        currentblock+=1
        
        #Enlarge subimage to take into account block effect due to non-overlapping patches
        xmin2 = np.max((0,xmin-hpx))
        xmax2 = np.min((data.shape[0],xmax+hpx))
        ymin2 = np.max((0,ymin-hpx))
        ymax2 = np.min((data.shape[1],ymax+hpx))
        zmin2 = np.max((0,zmin-hpx))
        zmax2 = np.min((data.shape[2],zmax+hpx))
      
        subdata = data[xmin2:xmax2,ymin2:ymax2,zmin2:zmax2]
        submask = mask[xmin2:xmax2,ymin2:ymax2,zmin2:zmax2]
        p = mp.array_to_patches(subdata,patch_shape=(px,px,px),normalization=False)
        pm = mp.array_to_patches(submask,patch_shape=(px,px,px),normalization=False)
        #remove patch we dont want to process
        index = ~np.all(pm==0,axis=1)
        subp = p[index]
        subp -= med
        
        if subp.shape[0] > 0:
          print('Number of patches to process: ',subp.shape[0])
          #Currently, there is a bug when using n_jobs>1 (https://github.com/scikit-learn/scikit-learn/issues/5956)
          coder = SparseCoder(dictionary=D, transform_algorithm='omp')
          code = coder.transform(subp).astype(np.float32)
          fmap += np.sum((np.fabs(code)>0),axis=0)
          subp = np.dot(code, D)          
          subp += med 
          p[index] = subp 
          suboutput = mp.patches_to_array(patches=p, patch_shape=(px,px,px), array_shape=subdata.shape)
          
          tmpoutput = np.empty(data.shape)
          tmpoutput[xmin2:xmax2,ymin2:ymax2,zmin2:zmax2]= suboutput      
          output[xmin:xmax,ymin:ymax,zmin:zmax] = tmpoutput[xmin:xmax,ymin:ymax,zmin:zmax]
          
#          for a in range(D.shape[0]):
#            for s in range(subp.shape[0]):
#              subp[s,:] = code[s,a]
#            p.fill(0)
#            p[index] = subp
#            fa = mp.patches_to_array(patches=p, patch_shape=(px,px,px), array_shape=subdata.shape)
#            to = np.empty(data.shape)
#            to[xmin2:xmax2,ymin2:ymax2,zmin2:zmax2]= fa      
#            fdata[xmin:xmax,ymin:ymax,zmin:zmax,a] = to[xmin:xmax,ymin:ymax,zmin:zmax]
              
            
          
  #plt.bar(range(0,D.shape[0]), fmap)
  #print('points in mask: ',np.sum(mask!=0))
  #print('Number of non zero elements: ',np.sum(fmap)/np.sum(mask!=0))
  #plt.show()
      
#  return (output,fdata)
  return output
    from scipy.io import wavfile
    from scikits.talkbox import segment_axis
    resolution = 160
    step = 8
    b = 1.019
    n_channels = 64
    overlap = 80
    
    # Compute a multiscale dictionary
    
    D_multi = np.r_[tuple(gammatone_matrix(b, fc, resolution, step) for
                          fc in erb_space(150, 8000, n_channels))]

    # Load test signal
    fs, y = wavfile.read('/home/jfsantos/data/TIMIT/TRAIN/DR1/FCJF0/SA1.WAV')
    y = y / 2.0**15
    Y = segment_axis(y, resolution, overlap=overlap, end='pad')
    Y = np.hanning(resolution) * Y

    # segments should be windowed and overlap
    
    coder = SparseCoder(dictionary=D_multi, transform_n_nonzero_coefs=None, transform_alpha=1., transform_algorithm='omp')
    X = coder.transform(Y)
    density = len(np.flatnonzero(X))
    out= np.zeros((np.ceil(len(y)/resolution)+1)*resolution)
    for k in range(0, len(X)):
        idx = range(k*(resolution-overlap),k*(resolution-overlap) + resolution)
        out[idx] += np.dot(X[k], D_multi)
    squared_error = np.sum((y - out[0:len(y)]) ** 2)
    wavfile.write('reconst_%d_%d.wav'%(resolution,overlap), fs, np.asarray(out, dtype=np.float32))
# (title, transform_algorithm, transform_alpha, transform_n_nozero_coefs)
estimators = [('OMP', 'omp', None, 15, 'navy'),
              ('Lasso', 'lasso_cd', 2, None, 'turquoise'), ]
lw = 2

plt.figure(figsize=(13, 6))
for subplot, (D, title) in enumerate(zip((D_fixed, D_multi),
                                         ('fixed width', 'multiple widths'))):
    plt.subplot(1, 2, subplot + 1)
    plt.title('Sparse coding against %s dictionary' % title)
    plt.plot(y, lw=lw, linestyle='--', label='Original signal')
    # Do a wavelet approximation
    for title, algo, alpha, n_nonzero, color in estimators:
        coder = SparseCoder(dictionary=D, transform_n_nonzero_coefs=n_nonzero,
                            transform_alpha=alpha, transform_algorithm=algo)
        x = coder.transform(y.reshape(1, -1))
        density = len(np.flatnonzero(x))
        x = np.ravel(np.dot(x, D))
        squared_error = np.sum((y - x) ** 2)
        plt.plot(x, color=color, lw=lw,
                 label='%s: %s nonzero coefs,\n%.2f error'
                 % (title, density, squared_error))

    # Soft thresholding debiasing
    coder = SparseCoder(dictionary=D, transform_algorithm='threshold',
                        transform_alpha=20)
    x = coder.transform(y.reshape(1, -1))
    _, idx = np.where(x != 0)
    x[0, idx], _, _, _ = np.linalg.lstsq(D[idx, :].T, y)
    x = np.ravel(np.dot(x, D))
    squared_error = np.sum((y - x) ** 2)
Esempio n. 41
0
def encode_kmeans_sparsecode(df, km, algo='lasso_cd', alpha=1, split=False):
    centroids = km.cluster_centers_
    D = [centroids[i]/np.linalg.norm(centroids[i]) for i in range(len(centroids))]
    D = np.array(D)
    sc = SparseCoder(D, transform_algorithm=algo, transform_alpha=alpha, split_sign=split)
    return pd.DataFrame(sc.transform(df))
Esempio n. 42
0
class SparseCoding(object):

    def __init__(self, log_lev='INFO', sparse_dim_rat=None, name='',
                 dist_beta=0.1, dist_sigma=0.005, display=0):
        LOG.setLevel(log_lev)

        self.name = name
        self.codebook_comps = None
        self.active_set = None
        self.min_coeff = max([1,
            co.CONST['sparse_fss_min_coeff']])
        self.min_coeff_rat = co.CONST['sparse_fss_min_coeff_rat']
        self.gamma = co.CONST['sparse_fss_gamma']
        self.rat = None
        if isinstance(self.gamma, str):
            if self.gamma.starts_with('var'):
                try:
                    self.rat = [float(s) for s in str.split() if
                                co.type_conv.isfloat(s)][0]
                except IndexError:
                    self.rat = None
        self.inp_features = None
        self.sparse_features = None
        self.basis_constraint = 1
        self.inv_codebook_comps = None
        self.res_codebook_comps = None
        self.max_iter = 500
        self.dict_max_iter = 300
        self.display = display
        self.prev_err = 0
        self.curr_error = 0
        self.allow_big_vals = False
        self.sparse_dim_rat = sparse_dim_rat
        if sparse_dim_rat is None:
            self.sparse_dim_rat = co.CONST['sparse_dim_rat']
        self.theta = None
        self.prev_sparse_feats = None
        self.flush_flag = False
        self.sparse_feat_list = None
        self.inp_feat_list = None
        self.codebook = None
        self.time = []

    def flush_variables(self):
        '''
        Empty variables
        '''
        self.active_set = None
        self.theta = None
        self.codebook_comps = None
        self.inp_features = None
        self.inp_feat_list = None
        self.sparse_features = None
        self.flush_flag = True
        self.res_codebook_comps = None
        self.prev_err = 0
        self.curr_error = 0
        self.lbds = 0.5*np.ones(self.sparse_dim)

    def initialize(self, feat_dim,
                   init_codebook_comps=None):
        '''
        Initialises B dictionary and s
        '''
        self.sparse_dim = self.sparse_dim_rat * feat_dim
        if init_codebook_comps is not None:
            if (init_codebook_comps.shape[0] == feat_dim and
                    init_codebook_comps.shape[1] == self.sparse_dim_rat *
                feat_dim):
                self.codebook_comps = init_codebook_comps.copy()
            else:
                raise Exception('Wrong input of initial B matrix, the dimensions' +
                                ' should be ' + str(feat_dim) + 'x' +
                                str(self.sparse_dim) + ', not ' +
                                str(init_codebook_comps.shape[0]) + 'x' +
                                str(init_codebook_comps.shape[1]))
        if (self.codebook_comps is None) or self.flush_flag:
            LOG.warning('Non existent codebook, manufactuning a random one')
            self.codebook_comps = random.random((feat_dim, self.sparse_dim))
        if (self.sparse_features is None) or self.flush_flag:
            self.sparse_features = zeros((self.sparse_dim, 1))
        self.theta = zeros(self.sparse_dim)
        self.active_set = zeros((self.sparse_dim), bool)
        self.sparse_features = zeros((self.sparse_dim, 1))
        self.flush_flag = False
        self.is_trained = False

    def object_val_calc(self, codebook_comps, ksi, gamma, theta, vecs):
        '''
        Calculate objective function value
        '''
        _bs_ = np.dot(codebook_comps, vecs)
        square_term = 0.5 * npsum((ksi - _bs_)**2, axis=0)
        res = (square_term + gamma * dot(theta.T, vecs)).ravel()
        return res

    def feature_sign_search_algorithm(self,
                                      inp_features,
                                      acondtol=1e-3,
                                      ret_error=False,
                                      display_error=False,
                                      max_iter=0,
                                      single=False, timed=True,
                                      starting_points=None,
                                      training=False):
        '''
        Returns sparse features representation
        '''
        self.min_coeff_rat = co.CONST['sparse_fss_min_coeff_rat']
        self.min_coeff = max([self.min_coeff,
                              self.min_coeff_rat *
                              np.size(inp_features)])
        if self.inp_feat_list is not None:
            self.inp_feat_list.append(inp_features.ravel())
        else:
            self.inp_feat_list = [inp_features.ravel()]
        self.inp_features = inp_features.copy().reshape((-1,1))
        # Step 1
        btb = dot(self.codebook_comps.T, self.codebook_comps)
        btf = dot(self.codebook_comps.T, self.inp_features)
        if self.rat is not None:
            self.gamma = np.max(np.abs(-2 * btf)) * self.rat

        gamma = self.gamma
        if starting_points is not None:
            self.sparse_features = starting_points.reshape((self.sparse_dim,
                                                            1))
            self.theta = np.sign(self.sparse_features)
            self.active_set[:] = False
            self.active_set[self.sparse_features.ravel()!=0] = True
            step2 = 0
        else:
            step2 = 1
        count = 0
        prev_objval = 0
        if max_iter == 0:
            max_iter = self.max_iter
        else:
            self.max_iter = max_iter
        self.prev_sparse_feats = None
        prev_error = 0
        initial_energy = compute_lineq_error(inp_features, 0,
                                                              0)
        interm_error = initial_energy
        SPLOG.info('Initial Signal Energy: ' + str(initial_energy))
        SPLOG.info('Initial nonzero elements number: ' +
                  str(np.sum(inp_features!=0)))
        converged = False
        for count in range(self.max_iter):
            # Step 2    
            if step2:
                zero_coeffs = (self.sparse_features == 0)
                qp_der_outfeati = 2 * \
                    (dot(btb, self.sparse_features)
                     - btf) * zero_coeffs.reshape((-1,1))
                i = argmax(npabs(qp_der_outfeati))
                if (npabs(qp_der_outfeati[i]) > gamma
                    or npsum(self.active_set) < self.min_coeff):
                    self.theta[i] = -sign(qp_der_outfeati[i])
                    self.active_set[i] = True
            # Step 3
            codebook_comps_h = self.codebook_comps[:, self.active_set]
            sparse_feat_h = self.sparse_features[self.active_set].reshape(
                (-1,1))
            theta_h = self.theta[self.active_set].reshape((-1,1))
            _q_ = dot(codebook_comps_h.T, self.inp_features) - gamma * theta_h / 2.0
            codebook_comps_h2 = dot(codebook_comps_h.T, codebook_comps_h)
            rank = matrix_rank(codebook_comps_h2)
            zc_search = True
            if rank == codebook_comps_h2.shape[0]:
                new_sparse_f_h = np.linalg.solve(codebook_comps_h2, _q_)
            else:
                u,s,v = np.linalg.svd(codebook_comps_h2)
                col_space = u[:, :rank]
                null_space = u[:, rank:]
                #Check if q belongs in column space, ie the projection of
                #q in the column space is q itself
                q_proj = np.zeros_like(_q_).reshape(-1, 1)
                for i in range(col_space.shape[1]):
                    col = col_space[:,i].reshape(-1, 1)
                    q_proj+=((dot(_q_.reshape(1,-1),col) /
                                   np.dot(col.T, col).astype(float))*col)
                '''
                LOG.info('q|Projection: ' +
                         str(np.concatenate((_q_.reshape(-1,1),q_proj),axis=1)))
                LOG.info('Projection Energy: '+ str(np.sum(q_proj**2)))
                LOG.info('Distance between q and projection: '+str(np.linalg.norm(q_proj.ravel()-_q_.ravel())))
                '''
                if np.allclose(q_proj.ravel()-_q_.ravel(), 0, atol=1.e-6):
                    new_sparse_f_h = dot(pinv(codebook_comps_h2),_q_)
                else:
                    #direction z in nullspace of codebook_comps_h2 can not be
                    #perpendicular to _q_, because then _q_ = C(codebook_comps_h2),
                    #which was proven not to hold.
                    #I take the principal vector that belongs in null_space of
                    #codebook_comps_h2 and add it to the current sparse_feat_h
                    #so that to search for zerocrossings
                    #inside the line constructed
                    # by this vector and sparse_feat_h, which has direction,
                    # belonging to null_space of codebook_comps_h2
                    tmp_sparse_f_h = sparse_feat_h + dot(null_space,
                                         np.ones((null_space.shape[1],1)))
                    zero_points_lin_par = sparse_feat_h / (sparse_feat_h
                                                           -
                                                           tmp_sparse_f_h).astype(float)
                    # find _t_ that corresponds to the closest zero crossing to
                    # sparse_feat_h
                    _t_ind = np.argmin(np.abs(zero_points_lin_par[
                        np.isfinite(zero_points_lin_par)]))
                    _t_ = zero_points_lin_par[
                        np.isfinite(zero_points_lin_par)][_t_ind]
                    null_vec = _t_ * tmp_sparse_f_h + (1 - _t_) * sparse_feat_h
                    new_sparse_f_h = null_vec
                    zc_search = False

            if (np.prod(sign(sparse_feat_h) != sign(new_sparse_f_h))
                and zc_search):
                zero_points_lin_par = sparse_feat_h / (sparse_feat_h -
                                                       new_sparse_f_h).astype(float)
                zero_points_lin_par = concatenate((zero_points_lin_par[
                    ((zero_points_lin_par > 0) *
                     (zero_points_lin_par < 1)).astype(bool)][:], array([1])), axis=0)
                _t_ = zero_points_lin_par
                null_vecs = _t_ * new_sparse_f_h + (1 - _t_) * sparse_feat_h
                objvals = self.object_val_calc(codebook_comps_h, self.inp_features, gamma,
                                               theta_h,
                                               null_vecs).flatten()
                objval_argmin = argmin(objvals)
                objval = np.min(objvals)
                new_sparse_f_h = null_vecs[:, objval_argmin][:, None].copy()
            else:
                objval = self.object_val_calc(codebook_comps_h, self.inp_features, gamma, theta_h,
                                              new_sparse_f_h)
            self.sparse_features[self.active_set] = new_sparse_f_h.copy()
            self.active_set[self.active_set] = np.logical_not(
                isclose(new_sparse_f_h, 0))
            if npsum(self.active_set) < self.min_coeff:
                step2 = 1
                continue
            self.theta = sign(self.sparse_features)
            # Step 4
            nnz_coeff = self.sparse_features != 0
            # a

            new_qp_der_outfeati = 2 * (dot(btb, self.sparse_features) - btf)
            cond_a = (new_qp_der_outfeati +
                      gamma * sign(self.sparse_features)) * nnz_coeff
            '''
            if np.abs(objval) - np.abs(prev_objval) > 100 and not\
                    self.allow_big_vals and not count == 0:
                if self.prev_sparse_feats is not None:
                    SPLOG.info('Current Objective Function value: ' +
                              str(np.abs(objval)))
                    SPLOG.info('Previous Objective Function value: ' +
                              str(np.abs(prev_objval)))
                    SPLOG.info('Problem with big values of inv(B^T*B)' +
                              ',you might want to increase atol' +
                              ' or set flag allow_big_vals to true' +
                              ' (this might cause' +
                              ' problems)')
                    SPLOG.info('Reverting to previous iteration result ' +
                              'and exiting loop..')
                    self.sparse_features = self.prev_sparse_feats.ravel()
                    break
                else:
                    LOG.error('Current Objective Function value: ' +
                              str(np.abs(objval)))
                    LOG.error('Previous Objective Function value: ' +
                              str(np.abs(prev_objval)))
                    LOG.error('Problem with big values of inv(B^T*B),increase atol' +
                              ' or set flag allow_big_vals to true (this might cause' +
                              ' serious convergence problems)')
                    LOG.error('Exiting as algorithm has not produced any'
                              + ' output results.')
                    exit()
            '''
            prev_objval = objval
            self.prev_sparse_feats = self.sparse_features
            if allclose(cond_a, 0, atol=acondtol):
                # go to cond b:
                z_coeff = self.sparse_features == 0
                cond_b = npabs(new_qp_der_outfeati * z_coeff) <= gamma
                if npsum(cond_b) == new_qp_der_outfeati.shape[0]:
                    self.sparse_features = self.sparse_features.reshape((-1,1))
                    converged = True
                    break
                else:
                    # go to step 2
                    step2 = 1
            else:
                # go to step 3
                step2 = 0
            if count % 10 == 0:
                interm_error = compute_lineq_error(
                    self.inp_features, self.codebook_comps,
                    self.sparse_features)
                if interm_error == prev_error or interm_error > initial_energy:
                    converged=True
                    break
                else:
                    prev_error = interm_error
                SPLOG.info('\t Epoch:' + str(count))
                SPLOG.info('\t\t Intermediate Error=' +
                          str(interm_error))
                if interm_error < 0.001:
                    converged=True
                    SPLOG.info('Small error, asssuming  convergence')
                    break
        '''
        if initial_energy < interm_error:
            if not training:
                LOG.warning('FSS Algorithm did not converge, using pseudoinverse' +
                            ' of provided codebook instead')
                if self.inv_codebook_comps is None:
                    self.inv_codebook_comps = pinv(self.codebook_comps)
                self.sparse_features=dot(self.inv_codebook_comps,self.inp_features).ravel()
            else:
                SPLOG.info('FSS Algorithm did not converge,' +
                            ' removing sample from training dataset...')
                self.sparse_features = None
            return (interm_error), False, initial_energy
        else:
        '''
        if not converged:
            SPLOG.info('FSS Algorithm did not converge' +
                  ' in the given iterations')
        else:
            SPLOG.info('Successful Convergence')
        SPLOG.info('\tFinal error: ' + str(interm_error))
        SPLOG.info('\tNumber of nonzero elements: ' +
                  str(np.sum(self.sparse_features!=0)))
        if not single:
            if self.sparse_feat_list is None:
                self.sparse_feat_list = [self.sparse_features.ravel()]
            else:
                self.sparse_feat_list.append(self.sparse_features.ravel())
        if ret_error:
            return (compute_lineq_error(self.inp_features, self.codebook_comps,
                                        self.sparse_features),
                    True, initial_energy)
        self.sparse_features = self.sparse_features.ravel()
        return None, True, None

    def lagrange_dual(self, lbds, ksi, _s_, basis_constraint):
        '''
        Lagrange dual function for the minimization problem
        <ksi> is input, <_s_> is sparse,
        '''
        lbds[lbds==0] = 10**(-18) #the drawback of this method
        self.ksist = dot(ksi, _s_.T)
        interm_result = inv(
            dot(_s_, _s_.T) + diag(lbds.ravel()))
        LOG.debug('Computed Lagrange Coefficients:\n'+str(np.unique(lbds)))
        res = ((dot(ksi.T,ksi)).trace() -
               (dot(dot(self.ksist, interm_result), self.ksist.T)).trace() -
               (basis_constraint * diag(lbds.ravel())).trace())
        return -res # minimizing negative = maximizing positive

    def lagrange_dual_grad(self, lbds, ksi, _s_, basis_constraint):
        '''
        Gradient of lagrange dual function, w.r.t. elf.codebook_comps,
                         self.sparse_feat_list,
                         self.are_sparsecoded_inp) = self.pickle.load(inp)
s_
        '''
        # lbds=lbds.flatten()
        interm_result = inv(
            dot(_s_, _s_.T) + diag(lbds.ravel()))
        interm_result = dot(self.ksist, interm_result)
        interm_result = dot(interm_result.T,interm_result)
        res = diag(interm_result) - basis_constraint
        return -res # minimizing negative = maximizing positive

    def lagrange_dual_hess(self, lbds, ksi, _s_, basis_constraint):
        '''
        It is not used, but it is here in case numpy solver gets also
        the hessian as input
        '''
        interm_result = inv(
            dot(_s_, _s_.T) + diag(lbds.ravel()))
        interm_result1 = dot(self.ksist, interm_result)
        res = -2 * dot(interm_result1.T, interm_result1) * interm_result
        return -res #minimizing negative = maximizing positive
    # pylint: disable=no-member

    def conj_grad_dict_compute(self):
        '''
        Function to train nxm matrix using truncated newton method
        '''
        options = {'disp':True}
        '''
        if self.res_codebook_comps is None:
            self.res_codebook_comps = self.codebook_comps
        LOG.info(self.res_codebook_comps.shape)
        '''
        res = minimize(self.lagrange_dual,
                       self.lbds.copy(),
                       method='Newton-CG',
                      jac=self.lagrange_dual_grad,
                      #hess=self.lagrange_dual_hess,
                 #bounds=np.array(([(10**(-18), 10**10)] *
                 #                 self.sparse_feat_list.shape[0])),
                 #stepmx=50.0,
                 #maxCGit=20,
                 #maxfun=100,
                 options=options,
                 #fmin=0.1,
                 #ftol=0.1,
                 #xtol=0.001,
                 #rescale=1.5,
                 args=(self.are_sparsecoded_inp.copy(),
                             self.sparse_feat_list.copy(),
                             self.basis_constraint)
                      )
        LOG.info(res)
        self.lbds = res.x
        LOG.info(np.unique(self.lbds))
        interm_result = (self.lbds+
                       dot(self.sparse_feat_list,
                           self.sparse_feat_list.T))
        LOG.info(np.linalg.rank(interm_result))
        codebook_comps = dot(inv(interm_result),
                       self.ksist.T).T
        return codebook_comps
# pylint: enable=no-member



    def train_sparse_dictionary(self, data, sp_opt_max_iter=200,
                                init_traindata_num=200, incr_rate=2,
                                min_iterations=3, init_codebook_comps=None,
                                log_lev=None, n_jobs=4):
        if log_lev is not None:
            LOG.setLevel(log_lev)
        self.codebook_comps = DictionaryLearning(
            n_components=self.sparse_dim_rat * data.shape[1],
                                       alpha=co.CONST['sparse_alpha'],
                                       verbose=1, n_jobs=n_jobs).fit(data).components_.T


    @timeit
    def code1(self, data, max_iter=None, errors=False):
        '''
        Sparse codes a single feature
        Requires that the dictionary is already trained
        '''
        if self.codebook is None:
            self.codebook = SparseCoder(self.codebook_comps.T,n_jobs=4)
        return self.codebook.transform(data.reshape(1,-1)).ravel()

    def train_sparse_dictionary1(self, data, sp_opt_max_iter=200,
                                init_traindata_num=200, incr_rate=2,
                                min_iterations=3, init_codebook_comps=None,
                                debug=False):
        '''
        <data> is a numpy array, holding all the features(of single kind) that
        are required to train the sparse dictionary, with dimensions
        [n_features, n_samples]. The sparse dictionary is trained with a random
        subset of <data>, which is increasing in each iteration with rate
        <incr_rate> , along with the max iterations <sp_opt_max_iter> of feature
        sign search algorithm. <min_iterations> is the least number of
        iterations of the dictionary training, after total data is processed.
        '''
        self.sparse_dim = min(data.shape) * self.sparse_dim_rat
        self.flush_variables()
        try:
            import progressbar
        except:
            LOG.warning('Install module progressbar2 to get informed about the'
                        +' feature sign search algorithm progress')
            pass
        self.initialize(data.shape[0], init_codebook_comps=init_codebook_comps)
        iter_count = 0
        retry_count = 0
        LOG.info('Training dictionary: ' + self.name)
        LOG.info('Minimum Epochs number after total data is processed:' + str(min_iterations))
        reached_traindata_num = False
        reached_traindata_count = 0
        computed = data.shape[1] * [None]
        retry = False
        lar_approx = False
        while True:
            LOG.info('Epoch: ' + str(iter_count))
            loaded = False
            self.sparse_feat_list = None
            self.inp_feat_list = None
            if debug and iter_count == 0:
                LOG.warning('Debug is on, loading data from first FSS execution')
                try:
                    with open(self.name+' debug_sparse.pkl','r') as inp:
                        (self.codebook_comps,
                         self.sparse_feat_list,
                         self.are_sparsecoded_inp) = pickle.load(inp)
                        loaded=True
                except (IOError, EOFError):
                    LOG.warning('Not existent '+self.name
                                +' debug_sparse.pkl')
            if not loaded:
                train_num = min(int(init_traindata_num *
                                    (incr_rate) ** iter_count),
                                data.shape[1])
                if train_num == data.shape[1] and not reached_traindata_num:
                    reached_traindata_num = True
                    LOG.info('Total data is processed')
                if reached_traindata_num:
                    reached_traindata_count += 1
                LOG.info('Number of samples used: ' + str(train_num))
                ran = rand.sample(xrange(data.shape[1]), train_num)
                feat_sign_max_iter = min(1000,
                                         sp_opt_max_iter * incr_rate ** iter_count)
                LOG.info('Feature Sign Search maximum iterations allowed:'
                         + str(feat_sign_max_iter))
                try:
                    format_custom_text = progressbar.FormatCustomText(
                        'Mean Initial Error: %(mean_init_energy).4f,'+
                        ' Mean Final Error: %(mean).4f ,Valid Samples Ratio: %(valid).2f',
                            dict(
                                mean_init_energy=0,
                                mean=0,
                                valid=0
                            ),
                        )
                    pbar = progressbar.ProgressBar(max_value=train_num - 1,
                                                  redirect_stdout=True,
                                                   widgets=[progressbar.widgets.Percentage(),
                                                            progressbar.widgets.Bar(),
                                                            format_custom_text])
                    errors=True
                    sum_error = 0
                    sum_energy = 0
                except UnboundLocalError:
                    pbar = None
                    errors = False
                    pass
                are_sparsecoded = []
                if pbar is not None:
                    iterat = pbar(enumerate(ran))
                else:
                    iterat = enumerate(ran)
                for count, sample_count in iterat:
                    fin_error, valid, init_energy = self.feature_sign_search_algorithm(
                        data[:, sample_count],
                        max_iter=feat_sign_max_iter,
                        ret_error=errors,training=True,
                        starting_points=computed[sample_count])
                    are_sparsecoded.append(True)
                    try:
                        if iter_count > 0 and valid:
                            #do not trust first iteration sparse features, before
                            #having trained the codebooks at least once
                            computed[sample_count] = self.sparse_feat_list[-1]
                    except (TypeError,AttributeError):
                        pass
                    if valid and pbar and errors:
                        sum_error += fin_error
                        mean_error = sum_error/float(sum(are_sparsecoded))
                        sum_energy += init_energy
                        mean_init_energy = sum_energy/float(sum(are_sparsecoded))
                    if pbar is not None:
                        format_custom_text.update_mapping(mean_init_energy=
                                                          mean_init_energy,
                                                          mean=mean_error,
                                                          valid=sum(are_sparsecoded)
                                                          /float(len(are_sparsecoded)))
                    self.initialize(data.shape[0])
                self.inp_feat_list = np.transpose(np.array(self.inp_feat_list))
                self.sparse_feat_list = np.array(self.sparse_feat_list).T
                are_sparsecoded = np.array(
                    are_sparsecoded).astype(bool)
                retry = np.sum(are_sparsecoded) < 1 / 3.0 * (are_sparsecoded).size
                self.are_sparsecoded_inp = self.inp_feat_list[:, are_sparsecoded]
                if debug and iter_count==0:
                    LOG.warning('Debug is on, saving debug_sparse.pkl')
                    with open(self.name + ' debug_sparse.pkl','w') as out:
                        pickle.dump((self.codebook_comps,
                                     self.sparse_feat_list,
                                     self.are_sparsecoded_inp), out)
            prev_error = compute_lineq_error(self.are_sparsecoded_inp, self.codebook_comps,
                self.sparse_feat_list)
            if not lar_approx:
                dictionary = self.conj_grad_dict_compute()
                curr_error = compute_lineq_error(
                    self.are_sparsecoded_inp,
                    dictionary,
                    self.sparse_feat_list)
            LOG.info('Reconstruction Error: ' + str(curr_error))
            if loaded:
                mean_init_energy=0
                mean_error = 0
            if curr_error > prev_error or mean_error>1000 or retry or lar_approx:
                if (prev_error > 100 or mean_error>1000
                    or retry or lar_approx):
                    if retry_count == 2 or lar_approx:
                        if iter_count != 0:
                            iter_count = 0
                            lar_approx = True
                            init_traindata_num = data.shape[1]
                            continue
                        LOG.warning('Training has high final error but' +
                                    ' reached maximum retries. No codebook can'
                                    + ' be produced with the fast method,'+
                                     ' using Lagrange Dual, as input'+
                                    ' sparsecoded data S is'
                                    +' ill-conditioned (too low' +
                                    ' rank of the STS).'+
                                     ' Least Angle Regression Method '+
                                    ' will be used')
                        self.codebook_comps = DictionaryLearning(
                            self.sparse_dim,
                            fit_algorithm='lars',
                            code_init=self.inp_feat_list.T).fit(
                                self.are_sparsecoded_inp.T).components_.T
                        curr_error = compute_lineq_error(
                                       self.are_sparsecoded_inp,
                                       self.codebook_comps,
                                       self.sparse_feat_list)
                        LOG.info('Reconstruction Error using LARS: '
                                 + str(curr_error))
                        if curr_error > 1000:
                            LOG.info('LARS method did not converge,' +
                                     ' no codebook is produced.')
                            self.is_trained = False
                            self.codebook_comps = None
                        else:
                            break
                    LOG.warning('Training of codebook ' + self.name + ' completed with no success,'+
                                ' reinitializing (Retry:' + str(retry_count + 1) + ')')
                    self.flush_variables()
                    self.initialize(data.shape[0])
                    computed = data.shape[1] * [None]
                    retry_count += 1
                    iter_count = -1
                    reached_traindata_count = 0
                    reached_traindata_num = False
                elif (np.isclose(prev_error,curr_error,atol=0.1)
                      and reached_traindata_num and
                      reached_traindata_count > min_iterations):
                    break
            if curr_error < 0.5 and reached_traindata_num:
                break
            if (reached_traindata_num and
                reached_traindata_count > min_iterations and
                iter_count >= 0):
                    break
            iter_count += 1
            self.codebook_comps = dictionary
        self.inp_feat_list = None
        self.sparse_feat_list = None
        self.is_trained = True

    @timeit
    def code(self, data, max_iter=None, errors=False):
        '''
        Sparse codes a single feature
        Requires that the dictionary is already trained
        '''
        if max_iter is None:
            max_iter = co.CONST['sparse_fss_max_iter']
        self.initialize(data.size)
        self.feature_sign_search_algorithm(data.ravel(), max_iter=max_iter,
                                           single=True, display_error=errors,
                                           ret_error=errors)
        return self.sparse_features


    def multicode(self, data, max_iter=None, errors=False):
        '''
        Convenience method for sparsecoding multiple features.
        <data> is assumed to have dimensions [n_features, n_samples]
        output has dimensions [n_sparse, n_samples]
        '''
        feat_dim = 0
        for datum in data:
            if datum is not None:
                feat_dim = len(datum)
        if feat_dim == 0 :
            raise Exception('Bad Input, full of nans')
        sparse_features = np.zeros((len(data),
                                    self.sparse_dim_rat* feat_dim))
        for count in range(len(data)):
            if data[count] is not None and np.prod(np.isfinite(data[count][:])):
                sparse_features[count, :] = self.code(data[count][:],
                                                  max_iter, errors).ravel()
            else:
                sparse_features[count, :] = np.nan
        return sparse_features
# (title, transform_algorithm, transform_alpha, transform_n_nozero_coefs)
estimators = [('OMP', 'omp', None, 15, 'navy'),
              ('Lasso', 'lasso_cd', 2, None, 'turquoise'), ]
lw = 2

plt.figure(figsize=(13, 6))
for subplot, (D, title) in enumerate(zip((D_fixed, D_multi),
                                         ('fixed width', 'multiple widths'))):
    plt.subplot(1, 2, subplot + 1)
    plt.title('Sparse coding against %s dictionary' % title)
    plt.plot(y, lw=lw, linestyle='--', label='Original signal')
    # Do a wavelet approximation
    for title, algo, alpha, n_nonzero, color in estimators:
        coder = SparseCoder(dictionary=D, transform_n_nonzero_coefs=n_nonzero,
                            transform_alpha=alpha, transform_algorithm=algo)
        x = coder.transform(y)
        density = len(np.flatnonzero(x))
        x = np.ravel(np.dot(x, D))
        squared_error = np.sum((y - x) ** 2)
        plt.plot(x, color=color, lw=lw,
                 label='%s: %s nonzero coefs,\n%.2f error'
                 % (title, density, squared_error))

    # Soft thresholding debiasing
    coder = SparseCoder(dictionary=D, transform_algorithm='threshold',
                        transform_alpha=20)
    x = coder.transform(y)
    _, idx = np.where(x != 0)
    x[0, idx], _, _, _ = np.linalg.lstsq(D[idx, :].T, y)
    x = np.ravel(np.dot(x, D))
    squared_error = np.sum((y - x) ** 2)
Esempio n. 44
0
def getSparseCodes(dataset,Dict):
    print Dict.shape
    print dataset.shape
    coder=SparseCoder(Dict,transform_algorithm='lasso_lars')
    return coder.transform(dataset)
Esempio n. 45
0
class TIMITSparseGenerator(Dataset):
    """
    Frame-based TIMIT dataset
    """

    _default_seed = (17, 2, 946)

    # Mean and standard deviation of the acoustic samples from the whole
    # dataset (train, valid, test).
    _mean = 0.0035805809921434142
    _std = 542.48824133746177

    def __init__(
        self,
        which_set,
        frame_length,
        overlap=0.5,
        frames_per_example=1,
        start=0,
        stop=None,
        audio_only=True,
        n_prev_phones=0,
        n_next_phones=0,
        samples_to_predict=1,
        filter_fn=None,
        rng=_default_seed,
        b=1.019,
        step=64,
        n_channels=64,
    ):
        """
        Parameters
        ----------
        which_set : str
            Either "train", "valid" or "test"
        frame_length : int
            Number of acoustic samples contained in a frame
        overlap : int, optional
            Number of overlapping acoustic samples for two consecutive frames.
            Defaults to 0, meaning frames don't overlap.
        frames_per_example : int, optional
            Number of frames in a training example. Defaults to 1.
        start : int, optional
            Starting index of the sequences to use. Defaults to 0.
        stop : int, optional
            Ending index of the sequences to use. Defaults to `None`, meaning
            sequences are selected all the way to the end of the array.
        audio_only : bool, optional
            Whether to load only the raw audio and no auxiliary information.
            Defaults to `False`.
        rng : object, optional
            A random number generator used for picking random indices into the
            design matrix when choosing minibatches.
        """
        self.frame_length = frame_length
        if overlap < 1.0:
            self.overlap = overlap * frame_length
        else:
            self.overlap = overlap
        self.frames_per_example = frames_per_example
        self.offset = self.frame_length - self.overlap
        self.audio_only = audio_only
        self.n_prev_phones = n_prev_phones
        self.n_next_phones = n_next_phones
        self.samples_to_predict = samples_to_predict
        self.b = b
        self.step = step
        self.n_channels = n_channels

        print "Frame length %d, overlap %d" % (self.frame_length, self.overlap)

        # Initializing the dictionary
        self.D = numpy.r_[
            tuple(
                gammatone_matrix(self.b, fc, self.frame_length, self.step)
                for fc in erb_space(150, 8000, self.n_channels)
            )
        ]
        print "Using dictionary with shape", self.D.shape

        self.coder = SparseCoder(
            dictionary=self.D, transform_n_nonzero_coefs=None, transform_alpha=None, transform_algorithm="omp"
        )

        # RNG initialization
        if hasattr(rng, "random_integers"):
            self.rng = rng
        else:
            self.rng = numpy.random.RandomState(rng)

        # Load data from disk
        self._load_data(which_set)

        examples_per_sequence = [0]

        for sequence_id, samples_sequence in enumerate(self.raw_wav):
            print "Sentence %d/%d" % (sequence_id, len(self.raw_wav))
            X = segment_axis(samples_sequence, frame_length, overlap, end="pad")
            X = numpy.hanning(self.frame_length) * X
            self.raw_wav[sequence_id] = scipy.sparse.csr_matrix(self.coder.transform(X))
            # TODO: change me
            # Generate features/targets/phones/phonemes/words map
            num_frames = self.raw_wav[sequence_id].shape[0]
            num_examples = num_frames - self.frames_per_example
            examples_per_sequence.append(num_examples)

        self.cumulative_example_indexes = numpy.cumsum(examples_per_sequence)
        self.samples_sequences = self.raw_wav
        numpy.save("%s_sparse_frames.npy" % which_set, self.samples_sequences)
        self.num_examples = self.cumulative_example_indexes[-1]

        # DataSpecs
        features_space = VectorSpace(dim=self.D.shape[0] * self.frames_per_example)
        features_source = "features"

        def features_map_fn(indexes):
            rval = []
            for sequence_index, example_index in self._fetch_index(indexes):
                rval.append(
                    self.samples_sequences[sequence_index][
                        example_index : example_index + self.frames_per_example
                    ].todense()
                )
            return rval

        targets_space = VectorSpace(dim=self.D.shape[0])
        targets_source = "targets"

        def targets_map_fn(indexes):
            rval = []
            for sequence_index, example_index in self._fetch_index(indexes):
                rval.append(self.samples_sequences[sequence_index][example_index + self.frames_per_example].todense())
            return rval

        space_components = [features_space, targets_space]
        source_components = [features_source, targets_source]
        map_fn_components = [features_map_fn, targets_map_fn]
        batch_components = [None, None]

        space = CompositeSpace(space_components)
        source = tuple(source_components)
        self.data_specs = (space, source)
        self.map_functions = tuple(map_fn_components)
        self.batch_buffers = batch_components

        # Defaults for iterators
        self._iter_mode = resolve_iterator_class("shuffled_sequential")
        self._iter_data_specs = (CompositeSpace((features_space, targets_space)), (features_source, targets_source))

    def _fetch_index(self, indexes):
        digit = numpy.digitize(indexes, self.cumulative_example_indexes) - 1
        return zip(digit, numpy.array(indexes) - self.cumulative_example_indexes[digit])

    def _load_data(self, which_set):
        """
        Load the TIMIT data from disk.

        Parameters
        ----------
        which_set : str
            Subset of the dataset to use (either "train", "valid" or "test")
        """
        # Check which_set
        if which_set not in ["train", "valid", "test"]:
            raise ValueError(
                which_set + " is not a recognized value. " + "Valid values are ['train', 'valid', 'test']."
            )

        # Create file paths
        timit_base_path = os.path.join(os.environ["PYLEARN2_DATA_PATH"], "timit/readable")
        raw_wav_path = os.path.join(timit_base_path, which_set + "_x_raw.npy")
        # Load data. For now most of it is not used, as only the acoustic
        # samples are provided, but this is bound to change eventually.
        # Set-related data
        self.raw_wav = serial.load(raw_wav_path)
        # self.scaler = serial.load(scaler_path)

    def _validate_source(self, source):
        """
        Verify that all sources in the source tuple are provided by the
        dataset. Raise an error if some requested source is not available.

        Parameters
        ----------
        source : `tuple` of `str`
            Requested sources
        """
        for s in source:
            try:
                self.data_specs[1].index(s)
            except ValueError:
                raise ValueError("the requested source named '" + s + "' " + "is not provided by the dataset")

    def get_data_specs(self):
        """
        Returns the data_specs specifying how the data is internally stored.

        This is the format the data returned by `self.get_data()` will be.

        .. note::

            Once again, this is very hacky, as the data is not stored that way
            internally. However, the data that's returned by `TIMIT.get()`
            _does_ respect those data specs.
        """
        return self.data_specs

    def get(self, source, indexes):
        """
        .. todo::

            WRITEME
        """
        if type(indexes) is slice:
            indexes = numpy.arange(indexes.start, indexes.stop)
        self._validate_source(source)
        rval = []
        for so in source:
            batch = self.map_functions[self.data_specs[1].index(so)](indexes)
            batch_buffer = self.batch_buffers[self.data_specs[1].index(so)]
            dim = self.data_specs[0].components[self.data_specs[1].index(so)].dim
            if batch_buffer is None or batch_buffer.shape != (len(batch), dim):
                batch_buffer = numpy.zeros((len(batch), dim), dtype=batch[0].dtype)
            for i, example in enumerate(batch):
                batch_buffer[i] = example
            rval.append(batch_buffer)
        return tuple(rval)

    @functools.wraps(Dataset.iterator)
    def iterator(self, mode=None, batch_size=None, num_batches=None, rng=None, data_specs=None, return_tuple=False):
        """
        .. todo::

            WRITEME
        """
        if data_specs is None:
            data_specs = self._iter_data_specs

        # If there is a view_converter, we have to use it to convert
        # the stored data for "features" into one that the iterator
        # can return.
        space, source = data_specs
        if isinstance(space, CompositeSpace):
            sub_spaces = space.components
            sub_sources = source
        else:
            sub_spaces = (space,)
            sub_sources = (source,)

        convert = []
        for sp, src in safe_zip(sub_spaces, sub_sources):
            convert.append(None)

        # TODO: Refactor
        if mode is None:
            if hasattr(self, "_iter_subset_class"):
                mode = self._iter_subset_class
            else:
                raise ValueError("iteration mode not provided and no default " "mode set for %s" % str(self))
        else:
            mode = resolve_iterator_class(mode)

        if batch_size is None:
            batch_size = getattr(self, "_iter_batch_size", None)
        if num_batches is None:
            num_batches = getattr(self, "_iter_num_batches", None)
        if rng is None and mode.stochastic:
            rng = self.rng
        return FiniteDatasetIterator(
            self,
            mode(self.num_examples, batch_size, num_batches, rng),
            data_specs=data_specs,
            return_tuple=return_tuple,
            convert=convert,
        )
class SMHClassifier(BaseEstimator):
    """
    SMH-based classifier.
    """
    def __init__(self, tuple_size=3, n_tuples=692,
                 wcc=None, ovr_thres=0.7):
        self.tuple_size = tuple_size

        if wcc:
            self.wcc = wcc
            self.n_tuples = log(0.5) / log(1.0 - pow(wcc, tuple_size))
        else:
            self.n_tuples = n_tuples

    def discover_topics(self, X, tuple_size=3, n_tuples=692,
                        weights=True, expand=True,
                        thres=0.7, cutoff=3):
        """
        Discovers topics from a text corpus.
        """
        ifs = array_to_listdb(X)
        mined = ifs.mine(tuple_size=tuple_size,
                         num_tuples=n_tuples,
                         weights=weights,
                         expand=expand)
        mined.cutoff(min=cutoff)
        models = mined.cluster_mhlink(thres=thres)
    
        return models

    def fit(self, X, tuple_size=3, n_tuples=692,
            weights=True, expand=True,
            thres=0.7, cutoff=3):
        """
        Discovers topics and used them as a dictionary for sparse-coding.
        """
        models = self.discover_topics(X,
                                      tuple_size=tuple_size,
                                      n_tuples=n_tuples,
                                      weights=weights,
                                      expand=expand,
                                      thres=thres,
                                      cutoff=cutoff)
        self.coder = SparseCoder(dictionary=normalize(models.toarray()),
                                 transform_algorithm='lasso_lars',
                                 split_sign=True,
                                 n_jobs=4)
        
    def fit_transform(self, X, tuple_size=3, n_tuples=692,
                      weights=None, expand=None,
                      thres=0.7, cutoff=3):
        """
        Discovers topics and used them as a dictionary to sparse-code
        the documents.
        """
        self.fit(X, tuple_size=tuple_size, n_tuples=n_tuples, weights=weights, expand=expand,
                 thres=thres, cutoff=cutoff)
        return self.coder.fit_transform(X.todense())

    
    def transform(self, X):
        """
        Sparse-code a given set of documents from the
        discovered topics.
        """
        return self.coder.transform(X.todense())