コード例 #1
0
    def load_eigenfunctions(self,m,which_lap='sym',D=None,remove_first_eig=False):
        """ Extract  ``m`` eigenvectors and eigenvalues of the laplacian, in non-decreasing order. 
        
            Args:
                which_lap (str) : Chooses the type of laplacian. One of `sym`,`comb` or `rw`.           
                m (int) : number of eigenvectors to extract
                D (`[NDArray[float].shape[N,N]`) : extra matrix for generalized eigenvalue problem
    
            
            Returns:
                Pair[NDArray[float].shape[M,N],NDArray[float].shape[M]] : matrix of eigenvectors, and vector of eigenvalues
                
        """
        if not self.cache_dir is None:
            eigvec_path = osp.join(self.cache_dir,f'eigvec_{which_lap}.npy')
            eigval_path = osp.join(self.cache_dir,f'eigval_{which_lap}.npy')
            files_exist = (osp.isfile(eigvec_path)) and (osp.isfile(eigval_path))
            if files_exist:
                LOG.info(f"Loading eigenfunctions in {eigvec_path} ...")
                EIGVAL = np.load(eigval_path)
                EigVec = np.load(eigvec_path)
                if EIGVAL.shape[0] >= m:
                    return EigVec[:,:m], EIGVAL[:m]

        
        L = lap_matrix(self,which_lap)
        print(f"Extracting {m} eigenvectors for matrix L (shape: {L.shape}, #edges= {L.data.shape}")
        m = min(m,L.shape[0]-1)
        eigVec, eigVal = extract_lap_eigvec(L,m,D,remove_first_eig)
        if (not self.cache_dir is None):
            LOG.info(f"Saving  eigenfunctions to {eigvec_path} ...")
            np.save(eigvec_path,eigVec)
            np.save(eigval_path,eigVal)
        return eigVec, eigVal
コード例 #2
0
    def run_all(self):

        CSV_PATH = os.path.join(CSV_FOLDER, self.get_spec_name() + '.csv')
        JOINED_CSV_PATH = os.path.join(CSV_FOLDER,
                                       self.get_spec_name() + '_joined.csv')

        cfgs = self.get_all_configs()
        cfgs_keys = set()
        for x in cfgs:
            cfgs_keys.update(x.keys())

        #List of produced output dicts
        output_dicts = list()

        cfgs_size = len(cfgs)

        has_written_already = False

        bar = progressbar.ProgressBar(maxval=cfgs_size)
        counter = 0
        bar.start()
        bar.update(0)

        for i in range(cfgs_size):
            print("PROGRESS: {}".format(i / cfgs_size))
            #Maybe suppress output
            nullwrite = open(os.devnull, 'w')
            oldstdout = sys.stdout
            if not self.DEBUG_MODE:
                sys.stdout = nullwrite

            output_dicts.append(self.run(cfgs[i]))

            sys.stdout = oldstdout
            #Append to csv if conditions are met
            if i == cfgs_size - 1 or i % self.WRITE_FREQ == 0:
                LOG.info("appending csv...", LOG.ll.SPECIFICATION)
                csv_exists = os.path.isfile(CSV_PATH)
                if self.OVERWRITE:
                    if csv_exists and has_written_already:
                        f_mode = 'a'
                    else:
                        f_mode = 'w'
                else:
                    if csv_exists:
                        f_mode = 'a'
                    else:
                        f_mode = 'w'
                LOG.debug("f_mode={}".format(f_mode), LOG.ll.SPECIFICATION)
                self._append_to_csv(output_dicts, CSV_PATH, f_mode, cfgs_keys)
                has_written_already = True
                output_dicts.clear()

            bar.update(i + 1)
        LOG.info(f"CSV saved at f{CSV_PATH}", LOG.ll.SPECIFICATION)
        aggregate_csv([CSV_PATH], JOINED_CSV_PATH)
コード例 #3
0
 def handle_adaptive_sigma(self,K):
     if not scipy.sparse.issparse(K):
         M = K
         M[M==0] = np.infty 
         M = np.sort(M, axis=1)
         self.sigma = np.mean(M[:,9])/3
         LOG.info("Adaptive sigma is {}".format(self.sigma),LOG.ll.MATRIX)
     else:
         self.sigma = np.mean([np.sort(K.getrow(i).data)[9]/3 for i in range(K.shape[0])])
     return partial(lambda d: np.exp(-(d*d)/(2*self.sigma*self.sigma)))
コード例 #4
0
    def createVideo(self):
        if not self.create_video:
            return

        if self.steps_taken == 0:
            return
        LOG.info("Creating video...", LOG.ll.HOOK)
        video_command = "ffmpeg -r {} -y  -pattern_type glob -i '{}' -c:v libx264 -vf fps=25 -pix_fmt yuv420p '{}'".format(\
            self.steps_taken/(15.0*5.0),
            os.path.join(self.filename_dir,self.temp_subfolder_name,"*.png".format(self.str_len)),
            os.path.join(self.filename_dir,self.video_path)
            )
        LOG.debug(video_command, LOG.ll.HOOK)
        os.system(video_command)
        LOG.info("Created video...", LOG.ll.HOOK)
コード例 #5
0
def _faiss_knn(X,k, mode='mut', inner_prod = False):
    # kNN search for the graph
    X  = np.ascontiguousarray(X)
    print("Number of GPUS detected by FAISS: {}".format(faiss.get_num_gpus() ))
    d = X.shape[1]
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0

    c = time.time()
    if inner_prod:
        faiss.normalize_L2(X)
        
        index =  faiss.GpuIndexFlatIP(res,d,flat_config)         
    else:
        index = faiss.GpuIndexFlatL2(res,d,flat_config)   # build the index
    #normalize_L2(X)
    elapsed = time.time() - c
    LOG.info(f'kNN Index built in {elapsed:.3f} seconds',LOG.ll.UTILS)
    index.add(X) 
    N = X.shape[0]
    Nidx = index.ntotal


    c = time.time()
    D, I = index.search(X, k + 1)
    elapsed = time.time() - c
    LOG.info(f'kNN Search done in {elapsed:.3f} seconds',LOG.ll.UTILS)



    # Create the graph
    D = np.sqrt(D[:,1:])
    
    
    
    I = I[:,1:]
    row_idx = np.arange(N)
    row_idx_rep = np.tile(row_idx,(k,1)).T
    W = scipy.sparse.csr_matrix((D.flatten('F'), (row_idx_rep.flatten('F'), I.flatten('F'))), shape=(N, N))
    
    
    W =  __symmetrize_KNN(W,mode=mode)

    return W
コード例 #6
0
ファイル: _lgc_tf.py プロジェクト: brunoklaus/tf-labelprop-BR
def LGC_iter_TF(X,W,Y,labeledIndexes, alpha = 0.1,num_iter = 1000, hook=None):
    c = time.time()
    
    """ Set W to sparse if necessary, make copy of Y """
    W = sparse.csr_matrix(W)        
    Y = np.copy(Y)
    
    """ Convert W to tensor """
    W = convert_sparse_matrix_to_sparse_tensor(W)
    LOG.debug(W,LOG.ll.CLASSIFIER)
    
    """ Get degree Matrix """
    D =  tf.sparse.reduce_sum(W,axis=1)
    
    
    """ F_0 is a copy of the label matrix, but we erase the information on labeled Indexes """
    F_0 = np.copy(Y).astype(np.float32) 
    F_0[np.logical_not(labeledIndexes),:] = 0.0
    
    
    
    """
        CREATE S - Needed for LGC propagation
    """
    S =  get_S_fromtensor(W)
    
    
    """
    CREATE F variable
    """
    F = tf.Variable(np.copy(F_0).astype(np.float32),name="F")
    F_0 = tf.Variable(F_0)
    TOTAL_ITER = tf.constant(int(num_iter))
    for _ in range(num_iter):
        F = (1-alpha)*F_0 + alpha*tf.sparse.sparse_dense_matmul(S,F)
    
    elapsed = time.time() - c
    LOG.info('Label Prop done in {:.2} seconds'.format(elapsed),
             LOG.ll.CLASSIFIER)
    
    return F.numpy()
コード例 #7
0
    def generateAffMat(self,X,Y=None,labeledIndexes=None,hook=None):
        """ Generates the Affinity Matrix.
        
            Returns:
                `tflabelprop.gssl.graph.gssl_affmat.AffMat`: An affinity matrix
         """
         
        """
             Return Cached matrix, if cache directory exists
        """
        X = X.astype(np.float32)
        
        if AffMat.cache_mat_exists(self.cache_dir):
            LOG.info(f"Loading Affinity Matrix from {self.cache_dir}...",LOG.ll.MATRIX)
            return AffMat(W=None,cache_dir=self.cache_dir)
         
        LOG.info("Creating Affinity Matrix...",LOG.ll.MATRIX)
        
        if not hook is None:
            hook._begin(X=X,Y=Y,labeledIndexes=labeledIndexes,W=None)
        
        K = self.get_or_calc_Mask(X)
        
        if self.sigma == "mean":
            self.dist_func = self.handle_adaptive_sigma(K)
        

        if not K.shape[0] == X.shape[0]:
            raise ValueError("Shapes do not match for X,K")
            
        
        W = self.W_from_K(X,K)
        
        if self.row_normalize == True:
            W = gutils.deg_matrix(W, pwr=-1.0, NA_replace_val=1.0) @ W 
        
        del K
        LOG.info("Creating Affinity Matrix...Done!",LOG.ll.MATRIX)
        assert(W.shape == (X.shape[0],X.shape[0]))
        if np.max(W)==0:
            raise Exception("Affinity matrix cannot have all entries equal to zero.")
        
        if not hook is None:
            hook._end(X=X,Y=Y,W=W)

        return AffMat(W=W.astype(np.float32),cache_dir=self.cache_dir)
コード例 #8
0
    def __MR(self, X, W, Y, labeledIndexes, p, optimize_labels, hook=None):
        """
            -------------------------------------------------------------
                INITIALIZATION
            --------------------------------------------------------------
        """

        ORACLE_Y = Y.copy()
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        l = np.reshape(np.array(np.where(labeledIndexes)), (-1))
        num_lab = l.shape[0]

        if not isinstance(p, int):
            p = int(p * num_lab)

        if p > Y.shape[0]:
            p = Y.shape[0]
            LOG.warn("Warning: p greater than the number of labeled indexes",
                     LOG.ll.CLASSIFIER)
        #W = gutils.scipy_to_np(W)
        #W =  0.5* (W + W.T)
        L = gutils.lap_matrix(W, which_lap='sym')
        D = gutils.deg_matrix(W, flat=True, pwr=-1.0)

        L = 0.5 * (L + L.T)

        def check_symmetric(a, tol=1e-8):
            return np.allclose(a, a.T, atol=tol)

        def is_pos_sdef(x):
            return np.all(np.linalg.eigvals(x) >= -1e-06)

        import scipy.sparse
        sym_err = L - L.T
        sym_check_res = np.all(np.abs(sym_err.data) < 1e-7)  # tune this value
        assert sym_check_res
        """---------------------------------------------------------------------------------------------------
                EIGENFUNCTION EXTRACTION
        ---------------------------------------------------------------------------------------------------
        """
        import time
        start_time = time.time()

        import os.path as osp
        from tf_labelprop.settings import INPUT_FOLDER

        cache_eigvec = osp.join(INPUT_FOLDER, 'eigenVectors.npy')
        cache_eigval = osp.join(INPUT_FOLDER, 'eigenValues.npy')

        if False:
            eigenValues, eigenVectors = np.load(cache_eigval), np.load(
                cache_eigvec)
            eigenVectors = eigenVectors[:, :p]
            eigenValues = eigenValues[:p]
        else:

            eigenVectors, eigenValues = W.load_eigenfunctions(p)

            time_elapsed = time.time() - start_time
            LOG.info("Took {} seconds to calculate eigenvectors".format(
                int(time_elapsed)))
            idx = eigenValues.argsort()
            eigenValues = eigenValues[idx]
            LOG.debug(eigenValues)
            assert eigenValues[0] <= eigenValues[eigenValues.shape[0] - 1]
            eigenVectors = eigenVectors[:, idx]
            np.save(cache_eigval, arr=eigenValues)
            np.save(cache_eigvec, arr=eigenVectors)
        U = eigenVectors
        LAMBDA = eigenValues

        U = U[:, np.argsort(LAMBDA)]
        LAMBDA = LAMBDA[np.argsort(LAMBDA)]

        import tensorflow as tf

        gpus = tf.config.experimental.list_physical_devices('GPU')

        #tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*8)])
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        """
        -------------------------------------------------------------------------
            Define Constants on GPU
        ------------------------------------------------------------------------------
        """
        U, X, Y = [tf.constant(x.astype(np.float32)) for x in [U, X, Y]]

        _U_times_U = tf.multiply(U, U)
        N = X.shape[0]

        def to_sp_diag(x):
            n = tf.cast(x.shape[0], tf.int64)
            indices = tf.concat([
                tf.range(n, dtype=tf.int64)[None, :],
                tf.range(n, dtype=tf.int64)[None, :]
            ],
                                axis=0)
            return tf.sparse.SparseTensor(indices=tf.transpose(indices),
                                          values=x,
                                          dense_shape=[n, n])

        @tf.function
        def smooth_labels(labels, factor=0.001):
            # smooth the labels
            labels = tf.cast(labels, tf.float32)
            labels *= (1 - factor)
            labels += (factor / tf.cast(tf.shape(labels)[0], tf.float32))
            # returned the smoothed labels
            return labels

        @tf.function
        def divide_by_row(x, eps=1e-07):
            x = tf.maximum(x, 0 * x)
            x = x + eps  # [N,C]    [N,1]
            return x / (tf.reduce_sum(x, axis=-1)[:, None])

        def spd_matmul(x, y):
            return tf.sparse.sparse_dense_matmul(x, y)

        def mult_each_row_by(X, by):
            """ Elementwise multiplies each row by a given row vector.
            
                For a 2D tensor, also correponds to multiplying each column by the respective scalar in the given row vector
                
                Args:
                    X (Tensor)  
                    by (Tensor[shape=(N,)]): row vector
            
            """
            #[N,C]  [N,1]
            return X * by[None, :]

        def mult_each_col_by(X, by):
            #[N,C]  [1,C]
            return X * by[:, None]

        @tf.function
        def accuracy(y_true, y_pred):
            acc = tf.cast(
                tf.equal(tf.argmax(y_true, axis=-1),
                         tf.argmax(y_pred, axis=-1)), tf.float32)
            acc = tf.cast(acc, tf.float32)
            return tf.reduce_mean(acc)

        """
            -----------------------------------------------------------------------------
            DEFINE VARS
            --------------------------------------------------------------------------------
        """

        MU = tf.Variable(0.1, name="MU")

        LAMBDA = tf.constant(LAMBDA.astype(np.float32), name="LAMBDA")
        PI = tf.Variable(tf.ones(shape=(tf.shape(Y)[0], ), dtype=tf.float32),
                         name="PI")
        _l = LAMBDA.numpy()
        CUTOFF = tf.Variable(0.0, name='CUTOFF')
        CUTOFF_K = tf.Variable(1.0)

        @tf.function
        def get_alpha(MU):
            return tf.pow(2.0, -tf.math.reciprocal(tf.abs(100 * MU)))

        @tf.function
        def to_prob(x):
            return tf.nn.softmax(x, axis=1)

        @tf.function
        def cutoff(x):
            return 1.0 / (1.0 + tf.exp(-CUTOFF_K * (CUTOFF - x)))

        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Conv1D(8, kernel_size=5, padding='same'))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.Conv1D(8, kernel_size=5, padding='same'))
        model.add(tf.keras.layers.Activation('relu'))
        model.add(tf.keras.layers.Conv1D(1, kernel_size=3, padding='same'))

        model.add(tf.keras.layers.Flatten())
        """
            -----------------------------------------------------------------------------
            DEFINE FORWARD
            --------------------------------------------------------------------------------
        """

        @tf.function
        def forward(Y, U, PI, mode='train', remove_diag=True):
            if mode == 'train':
                U = tf.gather(U, indices=np.where(labeledIndexes)[0], axis=0)
                Y = tf.gather(Y, indices=np.where(labeledIndexes)[0], axis=0)
                #F = tf.gather(F,indices=np.where(labeledIndexes)[0],axis=0)

                PI = tf.gather(PI, indices=np.where(labeledIndexes)[0], axis=0)

            pi_Y = spd_matmul(to_sp_diag(tf.abs(PI)), Y)

            alpha = get_alpha(MU)
            """
                Maybe apply custom convolution to LAMBDA, otherwise just fit LGC's alpha using the corresponding filter 1/(1-alpha + alpha*lambda)
            """
            if not self.custom_conv:
                lambda_tilde = tf.math.reciprocal(1 - alpha + alpha * LAMBDA)
            else:
                #lambda_tilde = tf.math.reciprocal(1-alpha + alpha*LAMBDA)
                _lambda = (LAMBDA -
                           tf.reduce_mean(LAMBDA)) / tf.math.reduce_std(LAMBDA)
                lambda_tilde = tf.clip_by_value(
                    2 * tf.nn.sigmoid(
                        tf.reshape(model(_lambda[None, :, None]), (-1, ))), 0,
                    1)
                lambda_tilde = tf.sort(lambda_tilde, direction='DESCENDING')
            lambda_tilde = tf.reshape(divide_by_row(lambda_tilde[None, :]),
                                      (-1, ))

            _self_infl = mult_each_row_by(
                tf.square(U), by=lambda_tilde
            )  #Square each element of U, then dot product of each row with lambda_tilde
            _self_infl = tf.reduce_sum(_self_infl, axis=1)

            _P_op = U @ (mult_each_col_by(
                (tf.transpose(U) @ pi_Y), by=lambda_tilde))
            if not remove_diag:
                _diag_P_op = tf.zeros_like(
                    mult_each_col_by(pi_Y, by=_self_infl))
            else:
                _diag_P_op = mult_each_col_by(pi_Y, by=_self_infl)
            return divide_by_row(_P_op - _diag_P_op), lambda_tilde, pi_Y

        """
            -----------------------------------------------------------------------------
                DEFINE LOSSES and learning schedule
            --------------------------------------------------------------------------------
        """
        losses = {
            'xent':
            lambda y_, y: tf.reduce_mean(-tf.reduce_sum(y_ * tf.cast(
                tf.math.log(smooth_labels(y, factor=0.01)), tf.float32),
                                                        axis=[1])),
            'sq_loss':
            lambda y_, y: tf.reduce_mean(
                tf.reduce_sum(tf.square(y_ - y), axis=[1])),
            'abs_loss':
            lambda y_, y: tf.reduce_mean(
                tf.reduce_sum(tf.abs(y_ - y), axis=[1])),
            'hinge':
            lambda y_, y: tf.reduce_mean(
                tf.reduce_sum(tf.maximum(1. - y_ * y, tf.zeros_like(y)),
                              axis=1))
        }

        NUM_ITER = 700
        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            0.5, decay_steps=200, decay_rate=0.9, staircase=False)

        opt = tf.keras.optimizers.Adam(0.05)

        Y_l = tf.gather(Y, indices=np.where(labeledIndexes)[0], axis=0)

        #import matplotlib.pyplot as plt
        #import matplotlib
        #matplotlib.use('tkagg')
        import pandas as pd
        """
            -----------------------------------------------------------------------------
            LEARNING
            --------------------------------------------------------------------------------
        """
        L = []
        df = pd.DataFrame()
        max_acc, min_loss = [0, np.inf]
        for i in range(NUM_ITER):
            #MU.assign(i)
            with tf.GradientTape() as t:
                # no need to watch a variable:
                # trainable variables are always watched
                pred_L, lambda_tilde, pi_Y = forward(Y, U, PI, mode='train')
                loss_sq = losses['sq_loss'](pred_L, Y_l)
                loss = losses['xent'](pred_L, Y_l)

                loss_xent = losses['xent'](pred_L, Y_l)

            acc = accuracy(Y_l, pred_L)
            _not_lab = np.where(np.logical_not(labeledIndexes))[0]
            acc_true = accuracy(
                tf.gather(ORACLE_Y, indices=_not_lab, axis=0),
                tf.gather(forward(Y, U, PI, mode='eval')[0],
                          indices=_not_lab,
                          axis=0))

            L.append(
                np.array([i, loss_sq, loss, loss_xent, acc,
                          acc_true])[None, :])
            """
                TRAINABLE VARIABLES GO HERE
            """
            if self.custom_conv:
                trainable_variables = model.weights
            else:
                trainable_variables = [MU]
            if optimize_labels:
                trainable_variables.append(PI)

            if acc > max_acc:
                print(max_acc)
                best_trainable_variables = [
                    k.numpy() for k in trainable_variables
                ]
                max_acc = acc
                min_loss = loss
                counter_since_best = 0
            elif acc <= max_acc:

                counter_since_best += 1
                if counter_since_best > 2000:
                    break
            """
                Apply gradients
            """
            gradients = t.gradient(loss, trainable_variables)
            opt.apply_gradients(zip(gradients, trainable_variables))
            """
                Project labels such that they sum up to the original amount
            """
            pi = PI.numpy()
            pi[labeledIndexes] = np.sum(
                labeledIndexes) * pi[labeledIndexes] / (np.sum(
                    pi[labeledIndexes]))
            PI.assign(pi)

            if i % 10 == 0:
                """ Print info """
                if not hook is None:
                    if self.hook_iter_mode == "labeled":
                        plot_y = np.zeros_like(Y)
                        plot_y[labeledIndexes] = Y_l.numpy()
                    else:
                        plot_y = tf.clip_by_value(
                            forward(Y, U, PI, mode='eval')[0], 0,
                            999999).numpy()
                    hook._step(step=i,
                               X=X,
                               W=W,
                               Y=plot_y,
                               labeledIndexes=labeledIndexes)
                alpha = get_alpha(MU)
                PI_l = tf.gather(PI,
                                 indices=np.where(labeledIndexes)[0],
                                 axis=0)
                LOG.info(
                    f"Acc: {acc.numpy():.3f}; ACC_TRUE:{acc_true.numpy():.3f}  Loss: {loss.numpy():.3f}; alpha = {alpha.numpy():.3f}; PI mean = {tf.reduce_mean(PI_l).numpy():.3f} "
                )

        #plt.scatter(range(lambda_tilde.shape[0]),np.log10(lambda_tilde/LAMBDA),s=2)
        #plt.show()
        for k in range(len(trainable_variables)):
            trainable_variables[k].assign(best_trainable_variables[k])
        return tf.clip_by_value(forward(Y, U, PI, mode='eval')[0], 0,
                                999999).numpy()
コード例 #9
0
    def __MR(self, X, W, Y, labeledIndexes, p, optimize_labels, hook=None):
        """
            -------------------------------------------------------------
                INITIALIZATION
            --------------------------------------------------------------
        """
        ORACLE_Y = Y.copy()
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        l = np.reshape(np.array(np.where(labeledIndexes)), (-1))
        num_lab = l.shape[0]

        if not isinstance(p, int):
            p = int(p * num_lab)

        if p > Y.shape[0]:
            p = Y.shape[0]
            LOG.warn("Warning: p greater than the number of labeled indexes",
                     LOG.ll.CLASSIFIER)
        #W = gutils.scipy_to_np(W)
        #W =  0.5* (W + W.T)
        L = gutils.lap_matrix(W)
        D = gutils.deg_matrix(W, flat=True, pwr=-1.0)

        L = 0.5 * (L + L.T)

        def check_symmetric(a, tol=1e-8):
            return np.allclose(a, a.T, atol=tol)

        def is_pos_sdef(x):
            return np.all(np.linalg.eigvals(x) >= -1e-06)

        import scipy.sparse
        sym_err = L - L.T
        sym_check_res = np.all(np.abs(sym_err.data) < 1e-7)  # tune this value
        assert sym_check_res
        """---------------------------------------------------------------------------------------------------
                EIGENFUNCTION EXTRACTION
        ---------------------------------------------------------------------------------------------------
        """
        import time
        start_time = time.time()
        eigenVectors, eigenValues = W.load_eigenfunctions(p)

        time_elapsed = time.time() - start_time
        LOG.info("Took {} seconds to calculate eigenvectors".format(
            int(time_elapsed)))
        U = eigenVectors
        LAMBDA = eigenValues
        """
        -------------------------------------------------------------------------
            Import and setup Tensorflow
        ------------------------------------------------------------------------------
        """
        import tensorflow as tf
        import tf_labelprop.gssl.classifiers.lgc_lvo_aux as aux
        gpus = tf.config.experimental.list_physical_devices('GPU')

        #tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*8)])
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        """
        -------------------------------------------------------------------------
            Define Constants on GPU
        ------------------------------------------------------------------------------
        """
        U, X, Y = [tf.constant(x.astype(np.float32)) for x in [U, X, Y]]
        _U_times_U = tf.multiply(U, U)
        N = X.shape[0]
        """
            -----------------------------------------------------------------------------
            DEFINE VARS
            --------------------------------------------------------------------------------
        """
        MU = tf.Variable(0.1, name="MU")

        LAMBDA = tf.constant(LAMBDA.astype(np.float32), name="LAMBDA")
        PI = tf.Variable(tf.ones(shape=(tf.shape(Y)[0], ), dtype=tf.float32),
                         name="PI")
        _l = LAMBDA.numpy()
        """
            -----------------------------------------------------------------------------
            DEFINE FORWARD
            --------------------------------------------------------------------------------
        """

        def forward(Y, U, PI, mode='train', p=None, remove_diag=True):
            if p is None:
                p = 99999

            pi_Y = aux.spd_matmul(aux.to_sp_diag(tf.abs(PI)), Y)

            alpha = self.get_alpha(MU)
            """
                Maybe apply custom convolution to LAMBDA, otherwise just fit LGC's alpha using the corresponding filter 1/(1-alpha + alpha*lambda)
            """
            #tf.print(alpha)
            a = alpha - alpha * LAMBDA
            lambda_tilde = 1 / (1 - a)
            """ Set entries corresponding to eigvector e_i to zero for i > p """
            lambda_tilde = tf.where(
                tf.less_equal(tf.range(0, lambda_tilde.shape[0]), p),
                lambda_tilde, 0 * lambda_tilde)

            _self_infl = aux.mult_each_row_by(
                tf.square(U), by=lambda_tilde
            )  #Square each element of U, then dot product of each row with lambda_tilde
            B = _self_infl
            _self_infl = tf.reduce_sum(_self_infl, axis=1)

            A = aux.mult_each_col_by((tf.transpose(U) @ pi_Y), by=lambda_tilde)
            _P_op = U @ (A)
            if not remove_diag:
                _diag_P_op = tf.zeros_like(
                    aux.mult_each_col_by(pi_Y, by=_self_infl))
            else:
                _diag_P_op = aux.mult_each_col_by(pi_Y, by=_self_infl)

            if mode == 'eval':
                return aux.divide_by_row(_P_op - _diag_P_op)
            else:
                return A, B, aux.divide_by_row(_P_op - _diag_P_op)

        def forward_eval(Y, U, PI, mode='train', p=None, remove_diag=True):
            if p is None:
                p = 99999

            pi_Y = aux.spd_matmul(aux.to_sp_diag(tf.abs(PI)), Y)

            alpha = self.get_alpha(MU)
            """
                Maybe apply custom convolution to LAMBDA, otherwise just fit LGC's alpha using the corresponding filter 1/(1-alpha + alpha*lambda)
            """
            #tf.print(alpha)
            a = alpha - alpha * LAMBDA
            lambda_tilde = 1 / (1 - a)
            """ Set entries corresponding to eigvector e_i to zero for i > p """
            lambda_tilde = tf.where(
                tf.less_equal(tf.range(0, lambda_tilde.shape[0]), p),
                lambda_tilde, 0 * lambda_tilde)

            _self_infl = aux.mult_each_row_by(
                tf.square(U), by=lambda_tilde
            )  #Square each element of U, then dot product of each row with lambda_tilde
            _self_infl = tf.reduce_sum(_self_infl, axis=1)

            A = aux.mult_each_col_by((tf.transpose(U) @ pi_Y), by=lambda_tilde)
            _P_op = U @ (A)
            if not remove_diag:
                _diag_P_op = tf.zeros_like(
                    aux.mult_each_col_by(pi_Y, by=_self_infl))
            else:
                _diag_P_op = aux.mult_each_col_by(pi_Y, by=_self_infl)

            return aux.divide_by_row(_P_op - _diag_P_op)

        """
            -----------------------------------------------------------------------------
                DEFINE LOSSES and learning schedule
            --------------------------------------------------------------------------------
        """
        losses = {
            'xent':
            lambda y_, y: tf.reduce_mean(-tf.reduce_sum(y_ * tf.cast(
                tf.math.log(aux.smooth_labels(y, factor=0.01)), tf.float32),
                                                        axis=[1])),
            'sq_loss':
            lambda y_, y: tf.reduce_mean(
                tf.reduce_sum(tf.square(y_ - y), axis=[1])),
            'abs_loss':
            lambda y_, y: tf.reduce_mean(
                tf.reduce_sum(tf.abs(y_ - y), axis=[1])),
            'hinge':
            lambda y_, y: tf.reduce_mean(
                tf.reduce_sum(tf.maximum(1. - y_ * y, tf.zeros_like(y)),
                              axis=1))
        }

        NUM_ITER = 10
        Y_l = tf.gather(Y, indices=np.where(labeledIndexes)[0], axis=0)
        U_l = tf.gather(U, indices=np.where(labeledIndexes)[0], axis=0)
        PI_l = tf.gather(PI, indices=np.where(labeledIndexes)[0], axis=0)
        """
            -----------------------------------------------------------------------------
            LEARNING
            --------------------------------------------------------------------------------
        """
        L = []
        df = pd.DataFrame()
        max_acc, min_loss = [0, np.inf]
        best_p = np.inf
        for i in range(NUM_ITER, 0, -1):
            MU.assign(i)

            A, B, _ = forward(Y_l, U_l, PI_l, mode='train')

            a1 = np.zeros_like(Y_l)
            a2 = np.zeros_like(Y_l)

            for i1 in range(p):
                a2 += mult_each_col_by(X=Y_l, by=B[:, i1])
                a1 += mult_each_col_by(
                    np.tile(A[i1, :][None, :], [a1.shape[0], 1]), U_l[:, i1])

                pred_L = aux.divide_by_row(a1 - a2)

                loss_sq = losses['sq_loss'](pred_L, Y_l)
                loss = losses['xent'](pred_L, Y_l)

                loss_xent = losses['xent'](pred_L, Y_l)

                acc = aux.accuracy(Y_l, pred_L)
                _not_lab = np.where(np.logical_not(labeledIndexes))[0]

                if self.DEBUG:
                    acc_true = aux.accuracy(
                        tf.gather(ORACLE_Y, indices=_not_lab, axis=0),
                        tf.gather(forward_eval(Y, U, PI, mode='eval', p=i1),
                                  indices=_not_lab,
                                  axis=0))
                    prop = np.max(
                        pd.value_counts(tf.argmax(pred_L, 1).numpy(),
                                        normalize=True).values)
                else:
                    acc_true = 0
                    prop = 0

                L.append(
                    np.array(
                        [i, i1, loss_sq, loss, loss_xent, acc, acc_true,
                         prop])[None, :])
                if (max_acc < acc) or (acc == max_acc and min_loss > loss):
                    print(
                        f"acc: {acc},p:{i1},Mu:{int(MU.numpy())}alpha:{self.get_alpha(MU.numpy()).numpy()}"
                    )
                    best_p = int(i1)
                    best_MU = int(MU.numpy())
                    max_acc = acc
                    min_loss = loss.numpy()
                    """
                    if self.DEBUG:
                        alpha = self.get_alpha(MU)
                        I = np.identity(Y.shape[0], dtype = np.float32)
                        match_true = tf.gather(np.linalg.inv(I- alpha*(I - gutils.lap_matrix(W,'sym')))@Y,_not_lab,axis=0)
                        F = forward_eval(Y,U,PI,mode='eval',p=best_p)
                        
                        match_approx = tf.gather(F,indices=_not_lab,axis=0)
                        match = aux.accuracy(match_true, match_approx)
                        
                        print(f"Match rate {np.round(100*match,3)} ")
                        print(f"LGC_acc = {np.round(100*aux.accuracy(match_true,tf.gather(ORACLE_Y,indices=_not_lab,axis=0)),3)} ")
                        print(f"LGCLVO_acc = {np.round(100*aux.accuracy(match_approx,tf.gather(ORACLE_Y,indices=_not_lab,axis=0)),3)} ")
                    """

            if i % 1 == 0:
                """ Print info """
                if not hook is None:
                    if self.hook_iter_mode == "labeled":
                        plot_y = np.zeros_like(Y)
                        plot_y[labeledIndexes] = Y_l.numpy()
                    else:
                        MU.assign(best_MU)
                        plot_y = tf.clip_by_value(
                            forward(Y, U, PI, p=best_p, mode='eval'), 0,
                            999999).numpy()

                    hook._step(step=i,
                               X=X,
                               W=W,
                               Y=plot_y,
                               labeledIndexes=labeledIndexes)
                alpha = self.get_alpha(MU)

                LOG.info(
                    f"Acc: {max_acc.numpy():.3f};  Loss: {loss.numpy():.3f}; alpha = {alpha.numpy():.3f};"
                )

        if self.DEBUG:
            df = pd.DataFrame(np.concatenate(L, axis=0),
                              index=range(len(L)),
                              columns=[
                                  'i', 'p', 'loss_sq', 'loss', 'loss_xent',
                                  'acc', 'acc_true', 'prop'
                              ])
            self.create_3d_mesh(df)

        print(f"BEst mu: {best_MU}; best p: {best_p}")
        MU.assign(best_MU)
        print(MU)

        return forward_eval(Y, U, PI, mode='eval', p=None).numpy()
        """
        ----------------------------------------------------
            PART 2
        -------------------------------------------------
        
        
        """

        opt = tf.keras.optimizers.Adam(0.05)

        max_acc = 0
        for i in range(7000):
            #MU.assign(i)
            with tf.GradientTape() as t:
                _, _, pred_L = forward(Y_l,
                                       U_l,
                                       tf.gather(
                                           PI,
                                           indices=np.where(labeledIndexes)[0],
                                           axis=0),
                                       mode='train',
                                       p=best_p)
                loss_sq = losses['sq_loss'](pred_L, Y_l)
                loss = losses['xent'](pred_L, Y_l)

                loss_xent = losses['xent'](pred_L, Y_l)

            acc = aux.accuracy(Y_l, pred_L)
            _not_lab = np.where(np.logical_not(labeledIndexes))[0]
            acc_true = aux.accuracy(
                tf.gather(ORACLE_Y, indices=_not_lab, axis=0),
                tf.gather(forward(Y, U, PI, mode='eval')[0],
                          indices=_not_lab,
                          axis=0))

            L.append(
                np.array([i, loss_sq, loss, loss_xent, acc,
                          acc_true])[None, :])
            """
                Project labels such that they sum up to the original amount
            """
            pi = PI.numpy()
            pi[labeledIndexes] = np.sum(
                labeledIndexes) * pi[labeledIndexes] / (np.sum(
                    pi[labeledIndexes]))
            PI.assign(pi)
            """
                TRAINABLE VARIABLES GO HERE
            """
            trainable_variables = []
            if optimize_labels:
                trainable_variables.append(PI)
            """
                Apply gradients
            """
            gradients = t.gradient(loss, trainable_variables)
            opt.apply_gradients(zip(gradients, trainable_variables))

            if acc > max_acc:
                print(max_acc)
                best_trainable_variables = [
                    k.numpy() for k in trainable_variables
                ]
                max_acc = acc
                min_loss = loss
                counter_since_best = 0

        for k in range(len(trainable_variables)):
            trainable_variables[k].assign(best_trainable_variables[k])

        return forward(Y, U, PI, mode='eval', p=None).numpy()
        """
        
        for c in df.columns:
            if c.startswith('loss'):
                df[c] = (df[c] - df[c].min())/(df[c].max()-df[c].min())
        
        for c in df.columns:
            if not c in 'i':
                plt.plot(df['i'],df[c],label=c)
        plt.legend()
        plt.show()
        
        #plt.scatter(range(lambda_tilde.shape[0]),np.log10(lambda_tilde/LAMBDA),s=2)
        #plt.show()
        """
        return tf.clip_by_value(forward(Y, U, PI, mode='eval')[0], 0,
                                999999).numpy()
コード例 #10
0
def intcomp_demo():

    LOG.info("Demonstração para Inteligência Computacional")
    ds, alg = "", ""
    
    while not ds in ['mnist','isolet','g241c']: 
        LOG.info("Qual DATASET? (MNIST ou ISOLET)")
        ds = input().lower()
    while not alg in ['L','D','N']: 
        LOG.info("Qual LGCLVO? (L ou D ou N[enhum])")
        alg = input().upper()
    
    import tf_labelprop.experiment.specification.specification_bits as spec
    from tf_labelprop.experiment.specification.specification_bits import allPermutations as P
    
    from tf_labelprop.experiment.specification.specification_skeleton import EmptySpecification
    class ExpIntComp(EmptySpecification):
        CACHE_AFFMAT = False
        def __init__(self,ds,alg):
            self.ds = ds
            self.alg = alg

        def get_spec_name(self):
            return "INTCOMP"
        
        def generalConfig(self):
            s = spec.GENERAL_DEFAULT
            s['id'] = [1]
            return P(s)
        
        def inputConfig(self):
            if self.ds == 'mnist':
                s = spec.INPUT_MNIST
                s['labeled_percent'] = [100/70000]
            elif self.ds=="isolet":
                s = spec.INPUT_ISOLET
            else:
                s = spec.INPUT_CHAPELLE_A
                s["use_chapelle_splits"] = [True]
                s['num_labeled'] = [10]
                s['benchmark'] = ['digit1']
            return P(s)
    
        def filterConfig(self):
            def alpha_to_mu(alpha):
                return (1-alpha)/alpha
            if self.alg in ["L"]:
                s = spec.FILTER_LGC_LVO_AUTO
                s['LGC_iter'] = [1000]
            else:
                s = spec.FILTER_NOFILTER
            
            return  P(s)
        
        
        def noiseConfig(self):
            s = spec.NOISE_UNIFORM_DET_SOME
            s["corruption_level"] = [0.0 if self.ds == "mnist" else 0.0]
            return P(s)
        
        def affmatConfig(self):
            if self.ds == "g241c":
                s = spec.AFFMAT_DEFAULT
                s['k'] = [500]
            elif self.ds=="mnist":
                s = spec.AFFMAT_DEFAULT
                s['k'] = [15]
            elif self.ds == "isolet":
                s = spec.AFFMAT_ISOLET
            else:
                s = spec.AFFMAT_DEFAULT
            
            return P(s)
        def algConfig(self):
            if self.alg=="D":
                s =spec.ALGORITHM_LGCLOO_DEFAULT
                s["optimize_labels"] = [False]
                s["custom_conv"] = [False]
                
                s['p'] = [1500 if self.ds == 'isolet' else 50]
                return P(s)
            s = spec.ALGORITHM_SIIS_DEFAULT
            s['m'] = [100]
            #s['mu'] = [(1-0.9)/0.9]
            #s['alpha'] = [0.9]
            #s['num_iter'] = [1000.0]
            def alpha_to_mu(alpha):
                return (1-alpha)/alpha
            #s["num_iter"] = [1000]
            return P(s)
    

    
    
    

    opt = ExpIntComp(ds=ds,alg=alg).get_all_configs()[0]
    
    ExperimentRun(opt).run(hook_list=[])
コード例 #11
0
 def _log(msg):
     LOG.info(msg,LOG.ll.EXPERIMENT)
コード例 #12
0
    def run(self,hook_list=PLOT_HOOKS):
        for k,v in self.args.items():
            LOG.info("{}:{}".format(k,v),LOG.ll.EXPERIMENT)
        
        
        #Multiplex the arguments, allocating each to the correct step
        mplex = postprocess(keys_multiplex(self.args))
        
        
        #Get Hooks:
        hooks = select_and_add_hook(hook_list, mplex, self)
        
        
        
        
        LOG.info("Step 1: Read Dataset",LOG.ll.EXPERIMENT)
        
        #Select Input 
        self.X,self.W,  self.Y_true, self.labeledIndexes = select_input(**mplex["INPUT"])
        
        if self.W is None:
            self.W = select_affmat(**mplex["AFFMAT"]).generateAffMat(self.X,hook=hooks["AFFMAT"])
        
        
        
        if "know_estimated_freq" in mplex["ALG"].keys():
            if mplex["ALG"]['know_estimated_freq']:
                mplex["ALG"]["use_estimated_freq"] = np.sum(self.Y_true,axis=0) / self.Y_true.shape[0]
            mplex["ALG"].pop("know_estimated_freq")
            
        if "know_estimated_freq" in mplex["FILTER"].keys():
            if mplex["ALG"]['know_estimated_freq']:
                mplex["FILTER"]["use_estimated_freq"] = np.sum(self.Y_true,axis=0) / self.Y_true.shape[0]
            mplex["FILTER"].pop("know_estimated_freq")
            
            
        
        
        
        
        LOG.info("Step 2: Apply Noise",LOG.ll.EXPERIMENT)
        #Apply Noise
        self.Y_noisy = select_noise(**mplex["NOISE"]).corrupt(self.Y_true, self.labeledIndexes,hook=hooks["NOISE"])
        



        
        LOG.info("Step 3: Create Affinity Matrix",LOG.ll.EXPERIMENT)
        #Generate Affinity Matrix
        self.W = select_affmat(**mplex["AFFMAT"]).generateAffMat(self.X,hook=hooks["AFFMAT"])
        
        
        
        LOG.info("Step 4: Filtering",LOG.ll.EXPERIMENT)
        #Create Filter
        ft = select_filter(**mplex["FILTER"])
        self.ft = ft

        
        noisyIndexes = (np.argmax(self.Y_true,axis=1) != np.argmax(self.Y_noisy,axis=1))
        
        self.Y_filtered, self.labeledIndexes_filtered = ft.fit(self.X, self.Y_noisy, self.labeledIndexes, self.W, hook=hooks["FILTER"])
        
        
        LOG.info("Step 5: Classification",LOG.ll.EXPERIMENT)
        #Select Classifier 
        alg = select_classifier(**mplex["ALG"])
        #Get Classification
        self.F = alg.fit(self.X,self.W,self.Y_filtered,self.labeledIndexes_filtered,hook=hooks["ALG"])
        
        
        LOG.info("Step 6: Evaluation",LOG.ll.EXPERIMENT)
        LOG.debug("ALGORITHM settings:{}".format(mplex["ALG"]["algorithm"]),LOG.ll.EXPERIMENT)
        
        """ Accuracy. """
        acc = gutils.accuracy(gutils.get_pred(self.F), gutils.get_pred(self.Y_true))
        
        
        acc_unlabeled = gutils.accuracy(gutils.get_pred(self.F)[np.logical_not(self.labeledIndexes)],\
                                         gutils.get_pred(self.Y_true)[np.logical_not(self.labeledIndexes)])
        acc_labeled = gutils.accuracy(gutils.get_pred(self.F)[self.labeledIndexes],\
                                         gutils.get_pred(self.Y_true)[self.labeledIndexes])
        
        
        CMN_acc = gutils.accuracy(gutils.get_pred(gutils.class_mass_normalization(self.F,self.Y_filtered,self.labeledIndexes,normalize_rows=True)), gutils.get_pred(self.Y_true))
      
        
        """
            Log accuracy results and update output dictionary
        """
        def _log(msg):
            LOG.info(msg,LOG.ll.EXPERIMENT)
            
        _log("Accuracy: {:.3%} | {:.3%}".format(acc,1-acc))
        _log("Accuracy (unlabeled): {:.3%} |{:.3%}".format(acc_unlabeled,1-acc_unlabeled))
        _log("Accuracy (labeled): {:.3%} | {:.3%}".format(acc_labeled,1-acc_labeled))    
        _log("Accuracy w/ CMN: {:.3%} | {:.3%}".format(CMN_acc,1-CMN_acc))
        
        self.out_dict.update({OUTPUT_PREFIX + "acc" :acc})
        self.out_dict.update({OUTPUT_PREFIX + "acc_unlabeled" :acc_unlabeled})
        self.out_dict.update({OUTPUT_PREFIX + "acc_labeled" :acc_labeled})
        self.out_dict.update({OUTPUT_PREFIX + "CMN_acc" :CMN_acc})
        
        
        
        return self.out_dict
コード例 #13
0
def transition_count_mat(Y, A):
    """ Obtains a transition count matrix for uniform noise,
    indicating how many instances should have their label flipped and to which class.
    
    Specifically, this returns a matrix M such that :math:`M[i,j]` is the number of instances of i-th class to be swapped
    to the j-th class.
    
    Args:
        Y (`[NDArray[int].shape[N,C]`) : Matrix encoding initial beliefs.
        A (`[NDArray[int].shape[C,C]`): Transition probabilities between each class.
    Returns:
        `[NDArray[int].shape[C,C]`: the transition count matrix.
    
    Raises:
        ValueError: if `p` is an invalid percentage.
    
    """
    c = Y.shape[1]
    class_freq = [int(round(sum(Y[:, i]))) for i in range(c)]
    num_clean = int(np.round(sum([class_freq[i] * A[i, i] for i in range(c)])))

    LOG.info(
        "NUM CLEAN:{};NUM NOISY:{};TOTAL:{}".format(
            num_clean,
            np.sum(class_freq) - num_clean, np.sum(class_freq)), LOG.ll.NOISE)
    """ Little procedure that allocates clean labels according to prob. of each diagonal entry """
    import heapq
    B = np.zeros((c, ))  #Soon to be our diagonal
    H = [(-A[i, i] * class_freq[i], i) for i in range(c)]
    heapq.heapify(H)

    for i in range(num_clean):
        x = heapq.heappop(H)
        id = x[1]
        val = x[0]
        B[id] += 1
        heapq.heappush(H, (val + 1, id))
    """ We approximate the "most commonly observed" scenario by rounding """
    #Fix possible isues
    for i in range(c):
        A[i, :] = np.round(A[i, :] * (class_freq[i] - num_clean))
        A[i, i] = B[i]

    A = A.astype(np.int32)

    observed_class_counts = np.asarray([A[i, i] for i in range(c)])
    expected_class_counts = np.asarray(
        [np.sum(np.argmax(Y, axis=1) == i) for i in range(c)])

    INFTY = A.shape[0]**2
    for i in range(c):
        S = sum(A[i, :]) - class_freq[i]
        if S > 0:
            for s in range(S):
                diff = observed_class_counts - expected_class_counts
                diff[i] = -INFTY
                diff[A[i, :] == 0] = -2 * INFTY
                j = np.random.choice(np.flatnonzero(diff == diff.max()))
                observed_class_counts[j] -= 1
                A[i, j] -= 1
        elif S < 0:
            for s in range(-S):
                diff = expected_class_counts - observed_class_counts
                diff[i] = -INFTY
                j = np.random.choice(np.flatnonzero(diff == diff.max()))
                observed_class_counts[j] += 1
                A[i, j] += 1
        assert sum(A[i, :]) - class_freq[i] == 0

    return A
コード例 #14
0
def apply_noise(Y, labeledIndexes, A, seed=None, deterministic=True):
    """ Corrupts a set percentage of initial labels with noise.
    
    Args:
        Y (`[NDArray[int].shape[N,C]`) : Matrix encoding initial beliefs.
        A (`[NDArray[int].shape[C,C]`): Transition probabilities between each class.
        labeledIndexes (`NDArray[bool].shape[N]`) : determines which indices are to be considered as labeled.
        seed (float) : Optional. Used to reproduce results. 
        
    Returns:
        `NDArray[int].shape[N,C]` : Belief matrix after corruption.
        
    """
    np.random.seed(seed)
    old_A = np.copy(np.asarray(A))
    if not np.all(old_A <= 1):
        LOG.debug(old_A, LOG.ll.NOISE)
        raise Exception("trans. mat has value >1")
    old_Y = np.copy(Y)
    is_flat = np.ndim(Y) == 1
    if is_flat:
        Y = gutils.init_matrix(Y, labeledIndexes)
    c = Y.shape[1]
    n = Y.shape[0]

    Y = Y[labeledIndexes, :]
    Y_flat = np.argmax(Y, axis=1)

    vec = np.random.RandomState(seed).permutation(Y.shape[0])
    assert not vec is None
    cursor = np.zeros((c), dtype=np.int32)

    if deterministic == True:
        A = transition_count_mat(Y, A)
    else:

        class_freq = [int(np.sum(Y[:, i])) for i in range(c)]

        num_clean = np.sum(labeledIndexes) * sum(
            [old_A[i, i] for i in range(c)]) / c

        num_clean = int(np.round(num_clean))
        num_noisy = np.sum(labeledIndexes) - num_clean

        ##########3
        perm = np.random.permutation(Y.shape[0])[0:num_noisy]
        A = np.zeros((c, c))
        for i in range(c):
            A[i, i] = class_freq[i]

        for my_id in perm:
            j = np.argmax(Y[my_id, :])
            A[j, j] -= 1
            new_j = j
            while new_j == j:
                new_j = np.random.choice(c)
            A[j, new_j] += 1

        assert np.sum(A) == np.sum(labeledIndexes)
        LOG.debug(A, LOG.ll.NOISE)
        ###############

    for i in np.arange(Y_flat.shape[0]):
        current_class = Y_flat[vec[i]]
        while A[current_class, cursor[current_class]] == 0:
            cursor[current_class] += 1
            assert cursor[current_class] < c
        Y_flat[vec[i]] = cursor[current_class]
        A[current_class, cursor[current_class]] -= 1

    noisy_Y = np.zeros(shape=(n, c))
    labeledIndexes_where = np.where(labeledIndexes)[0]
    for l in range(Y_flat.shape[0]):
        noisy_Y[labeledIndexes_where[l], Y_flat[l]] = 1
    noisy_Y[np.logical_not(labeledIndexes), :] = old_Y[
        np.logical_not(labeledIndexes), :]
    LOG.info(
        "Changed {} percent of entries".format(
            np.round(1 - gutils.accuracy(np.argmax(Y, axis=1), Y_flat), 6)),
        LOG.ll.NOISE)

    if is_flat:
        old_Y[labeledIndexes] = np.argmax(noisy_Y[labeledIndexes], axis=1)
        return old_Y
    else:
        return noisy_Y
コード例 #15
0
ファイル: clgc.py プロジェクト: brunoklaus/tf-labelprop-BR
    def __LGC(self,
              X,
              W,
              Y,
              labeledIndexes,
              alpha=0.1,
              useEstimatedFreq=None,
              hook=None):
        """ Init """
        import scipy.sparse
        if scipy.sparse.issparse(W):
            W = W.todense()
        Y = self.CLEAN_UNLABELED_ROWS(Y, labeledIndexes)
        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")
        """ Estimate frequency of classes"""
        num_labeled = Y[labeledIndexes].shape[0]
        num_classes = Y.shape[1]
        if not useEstimatedFreq is None:
            if isinstance(useEstimatedFreq, bool):
                estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled
            else:
                estimatedFreq = useEstimatedFreq

        else:
            estimatedFreq = np.repeat(1 / num_classes, num_classes)
        omega = estimatedFreq
        """  """
        mu = (1 - alpha) / alpha
        n = Y.shape[0]
        c = Y.shape[1]

        I = np.identity(Y.shape[0])
        S = I - gutils.lap_matrix(W, which_lap='sym')
        """ stuff that has matrix multiplication with theta """
        theta = (1 / mu) * np.asarray(np.linalg.inv(I - alpha * S))
        F_lgc = (theta @ Y) * mu
        theta_1n = np.sum(theta, axis=1).flatten()
        theta_1n_ratio = (theta_1n /
                          (np.sum(theta_1n)))[:, np.newaxis]  #Shape: nx1
        print(theta_1n_ratio.shape)
        """ Intermediate calc """
        zeta = n * omega - np.sum(F_lgc, axis=0)  #Shape: 1xc
        zeta = np.reshape(zeta, (1, c))

        ypsilon = np.ones(shape=(n,1)) - np.sum(F_lgc,axis=1)[:,np.newaxis] -\
             theta_1n_ratio * (n - np.sum(F_lgc.flatten())) #Shape: nx1

        F = F_lgc
        F += theta_1n_ratio @ zeta
        F += (1 / c) * (ypsilon @ np.ones((1, c)))

        log_args = [
            np.round(x, 3)
            for x in [np.sum(F, axis=1)[0:10],
                      np.sum(F, axis=0), n * omega]
        ]
        LOG.info(
            "F sum on rows: {} (expected 1,1,...,1); F sum col: {} (expected {})"
            .format(*log_args))

        return F
コード例 #16
0
    def LGCLVO(self,
               X,
               W,
               Y,
               labeledIndexes,
               mu=99.0,
               useEstimatedFreq=True,
               tuning_iter=0,
               hook=None,
               constant_prop=False,
               useZ=True,
               normalize_rows=True):

        labeledIndexes, noisyIndexes = labeledIndexes

        Y = np.copy(Y)
        #We make a deep copy of labeledindexes
        labeledIndexes = np.array(labeledIndexes)
        lids = np.where(labeledIndexes)[0]
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0

        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")

        W = 0.5 * (W + W.transpose())

        num_labeled = Y[labeledIndexes].shape[0]
        num_unlabeled = Y.shape[0] - num_labeled
        num_classes = Y.shape[1]

        D = gutils.deg_matrix(W, flat=True)
        if not useEstimatedFreq is None:
            if isinstance(useEstimatedFreq, bool):
                estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled
            else:
                estimatedFreq = useEstimatedFreq

        else:
            estimatedFreq = np.repeat(1 / num_classes, num_classes)

        if scipy.sparse.issparse(W):
            l = np.sum(labeledIndexes)

            itertool_prod = [[i, j] for i in range(l) for j in range(l)]

            row = np.asarray([lids[i] for i in range(l)])
            col = np.asarray([i for i in range(l)])
            data = np.asarray([1.0] * l)
            temp_Y = _to_np(
                scipy.sparse.coo_matrix((data, (row, col)),
                                        shape=(W.shape[0], l)))

            PL = LGC_iter_TF(X,
                             W,
                             Y=temp_Y,
                             labeledIndexes=labeledIndexes,
                             alpha=1 / (1 + mu),
                             num_iter=1000)

            PL = PL[labeledIndexes, :]
            PL[range(PL.shape[0]), range(PL.shape[0])] = 0  #Set diagonal to 0

            PL = PL

            del temp_Y

            row = np.asarray(
                [lids[x[0]] for x in itertool_prod if x[0] != x[1]])
            col = np.asarray(
                [lids[x[1]] for x in itertool_prod if x[0] != x[1]])
            data = [PL[x[0], x[1]] for x in itertool_prod if x[0] != x[1]]
            P = scipy.sparse.coo_matrix((data, (row, col)),
                                        shape=W.shape).tocsr()

            P = P
        else:
            #Identity matrix
            I = np.identity(W.shape[0])
            #Get graph laplacian
            L = gutils.lap_matrix(W, which_lap='sym')
            #Propagation matrix
            P = np.zeros(W.shape)
            P[np.ix_(labeledIndexes,
                     labeledIndexes)] = np.linalg.inv(I + 0.5 *
                                                      (L + L.transpose()) /
                                                      mu)[np.ix_(
                                                          labeledIndexes,
                                                          labeledIndexes)]
            P[labeledIndexes, labeledIndexes] = 0
            P[np.ix_(labeledIndexes, labeledIndexes)] = P[np.ix_(
                labeledIndexes, labeledIndexes)] / np.sum(P[np.ix_(
                    labeledIndexes, labeledIndexes)],
                                                          axis=0,
                                                          keepdims=False)

        W = scipy.sparse.csr_matrix(W)

        Z = []

        detected_noisylabels = []
        suggested_labels = []
        where_noisylabels = []
        Q_values = []

        Y_flat = np.argmax(Y, axis=1)

        def divide_row_by_sum(e):

            e = _to_np(e)
            if normalize_rows:
                e = e / np.sum(e + 1e-100, axis=1, keepdims=True)
                return e
            else:
                return e

        def find_argmin(Q, class_to_unlabel):
            id_min_line = np.argmin(Q[:, class_to_unlabel])
            id_min_col = class_to_unlabel
            return id_min_line, id_min_col, Q[id_min_line, id_min_col]

        #######################################################################################
        '''BEGIN iterations'''

        Q = None
        cleanIndexes = np.copy(labeledIndexes)
        for i_iter in range(tuning_iter):

            found_noisy = True

            if np.sum(labeledIndexes) > 0 and found_noisy:
                '''Z matrix - The binary values of current Y are replaced with their corresponding D entries.
                    Then, we normalize each row so that row sums to its estimated influence
                '''
                useZ = False
                if i_iter >= 0:
                    if (not self.use_baseline) or Q is None:
                        if useZ:
                            Z = gutils.calc_Z(Y,
                                              labeledIndexes,
                                              D,
                                              estimatedFreq,
                                              weigh_by_degree=False)
                            F = P @ Z
                            if scipy.sparse.issparse(F):
                                F = np.asarray(F.toarray())

                            #Compute graph gradient
                            Q = (divide_row_by_sum(F) - divide_row_by_sum(Z))
                        else:
                            F = P @ Y
                            if scipy.sparse.issparse(F):
                                F = np.asarray(F.toarray())
                            Q = (divide_row_by_sum(F) - divide_row_by_sum(Y))
                #import scipy.stats

                #During label tuning, we'll also 'unlabel' the argmax
                unlabeledIndexes = np.logical_not(cleanIndexes)
                if self.early_stop:
                    Q[np.sum(F, axis=1) == 0.0, :] = 9999

                Q[unlabeledIndexes, :] = np.inf

                #Find minimum unlabeled index
                if constant_prop:
                    expectedNumLabels = estimatedFreq * np.sum(labeledIndexes)
                    actualNumLabels = np.sum(Y[labeledIndexes, :], axis=0)
                    temp = expectedNumLabels - actualNumLabels
                    class_priority = np.argsort(temp)

                    found_noisy = False
                    for class_to_unlabel in class_priority:
                        id_min_line, id_min_col, val = find_argmin(
                            Q, class_to_unlabel)
                        if val < 0:
                            #This means that the class would have a different label under the modified label prop
                            found_noisy = True
                            break

                else:
                    id_min = np.argmin(Q)
                    id_min_line = id_min // num_classes
                    id_min_col = id_min % num_classes  #The class previously assigned to instance X_{id_min_line}
                    found_noisy = Q[id_min_line, id_min_col] < 0

                if found_noisy:

                    id_max_col = np.argmax(
                        Q[id_min_line, :])  #The new, suggested class

                    detected_noisylabels.append(id_min_col)
                    where_noisylabels.append(id_min_line)

                    suggested_labels.append(id_max_col)
                    Q_values.append(1 + Q[id_min_line, id_min_col])

                    #Unlabel OP
                    if labeledIndexes[id_min_line] == False:
                        raise Exception(
                            "Error: unlabeled instance was selected")
                    if not Y[id_min_line, id_min_col] == 1:
                        raise Exception("Error: picked wrong class to unlabel")

                    labeledIndexes[id_min_line] = False
                    cleanIndexes[id_min_line] = False

                    if not Y[id_min_line, id_min_col] == 1:
                        raise Exception(
                            "Tried to remove label from unlabeled instance")

                    Y[id_min_line, id_min_col] = 0
                    if self.relabel:
                        labeledIndexes[id_min_line] = True
                        Y[id_min_line, :] = 0
                        Y[id_min_line, id_max_col] = 1

            if not hook is None:
                hook._step(step=(i_iter + 1),
                           X=X,
                           W=W,
                           Y=Y,
                           labeledIndexes=labeledIndexes)
        '''
        MATPLOTLIB stuff 
        '''

        import cv2 as cv

        #ret2,th2 = cv.threshold(255*np.asarray(Q_values).astype(np.uint8),0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)

        import matplotlib
        matplotlib.use("TkAgg")
        import matplotlib.pyplot as plt

        fig = plt.figure(figsize=(5 * 3, 2 * 3))
        ax = fig.add_subplot()
        #ax.plot(np.arange(len(Q_values)),Q_values)
        ax.scatter(np.arange(len(Q_values)),
                   Q_values,
                   c=noisyIndexes[where_noisylabels])
        ax.set_xlabel("#Labels Removed", fontsize=22)
        ax.set_ylabel("Consistency with LGC", fontsize=22)

        ax.axvline(np.sum(noisyIndexes), color='red')

        # We change the fontsize of minor ticks label
        ax.tick_params(axis='both', which='major', labelsize=18)
        ax.tick_params(axis='both', which='minor', labelsize=18)

        # For the minor ticks, use no labels; default NullFormatter.
        ax.yaxis.set_minor_locator(matplotlib.ticker.MultipleLocator(0.1))
        fig.tight_layout()
        plt.axhline(np.max(Q_values[0:(1 + np.sum(noisyIndexes))]),
                    color='green')
        plt.grid(True, axis='y', linestyle='-', alpha=0.5, which='major')
        plt.grid(True, axis='y', linestyle='--', alpha=0.5, which='minor')

        #plt.axvline(th2,color='purple')
        plt.savefig(
            '/home/klaus/eclipse-workspace/NoisyGSSL/results/python_plotly/' +
            'mnist_alpha=0.99_noise=0.3_thresh_static.png')

        #print(th2)
        plt.show()
        '''END iterations'''
        LOG.info(
            "NUMBER OF DETECTED NOISY INSTANCES:{}".format(
                len(detected_noisylabels)), LOG.ll.FILTER)

        return Y, labeledIndexes
コード例 #17
0
ファイル: clgc.py プロジェクト: brunoklaus/tf-labelprop-BR
    def __LGC_iter_TF(self,
                      X,
                      W,
                      Y,
                      labeledIndexes,
                      alpha=0.1,
                      useEstimatedFreq=True,
                      num_iter=1000,
                      hook=None):
        from tf_labelprop.gssl.classifiers._lgc_tf import LGC_iter_TF
        """ Init """
        import scipy.sparse
        if not scipy.sparse.issparse(W):
            W = scipy.sparse.csr_matrix(W)
        Y = np.copy(Y)
        if Y.ndim == 1:
            Y = gutils.init_matrix(Y, labeledIndexes)
        Y[np.logical_not(labeledIndexes), :] = 0
        if not W.shape[0] == Y.shape[0]:
            raise ValueError("W,Y shape not compatible")
        """ Estimate frequency of classes"""
        num_labeled = Y[labeledIndexes].shape[0]
        num_classes = Y.shape[1]
        if not useEstimatedFreq is None:
            if isinstance(useEstimatedFreq, bool):
                estimatedFreq = np.sum(Y[labeledIndexes], axis=0) / num_labeled
            else:
                estimatedFreq = useEstimatedFreq

        else:
            estimatedFreq = np.repeat(1 / num_classes, num_classes)
        omega = estimatedFreq
        """  """
        mu = (1 - alpha) / alpha
        n = Y.shape[0]
        c = Y.shape[1]
        print(np.concatenate([Y, np.ones((n, 1))], axis=1))
        """ stuff that has matrix multiplication with theta """
        PY1 = LGC_iter_TF(X, W, np.concatenate([Y, np.ones((n, 1))], axis=1),
                          labeledIndexes, alpha, num_iter, hook)
        PY1 = np.asarray(PY1)
        F_lgc, theta_1n = (1 / mu) * PY1[:, :-1], (1 / mu) * PY1[:, -1]
        theta_1n_ratio = (theta_1n /
                          (np.sum(theta_1n)))[:, np.newaxis]  #Shape: nx1
        """ Intermediate calc """
        zeta = n * omega - np.sum(F_lgc, axis=0)  #Shape: 1xc
        zeta = np.reshape(zeta, (1, c))

        ypsilon = np.ones(shape=(n,1)) - np.sum(F_lgc,axis=1)[:,np.newaxis] -\
             theta_1n_ratio * (n - np.sum(F_lgc.flatten())) #Shape: nx1

        F = F_lgc
        F += theta_1n_ratio @ zeta
        F += (1 / c) * (ypsilon @ np.ones((1, c)))
        import pandas as pd
        print(pd.Series(np.argmax(F, axis=1)).value_counts() / n)

        log_args = [
            np.round(x, 3)
            for x in [np.sum(F, axis=1)[0:10],
                      np.sum(F, axis=0), n * omega]
        ]
        LOG.info(
            "F sum on rows: {} (expected 1,1,...,1); F sum col: {} (expected {})"
            .format(*log_args))

        return F
コード例 #18
0
def plotGraph(X,
              W,
              labeledIndexes,
              vertex_opt,
              plot_filepath=None,
              online=False,
              interactive=False,
              title="",
              plot_size=_plot_big,
              edge_width=0.5,
              labeled_only=False):
    """ Plots a GSSL graph.
        
        Creates a plot showing vertices connected by edges from the affinity matrix in 2D/3D.
        A number of different configurations is possible by the use of a vertexplotOpt object.
        
        Args:
            X (`NDArray[float].shape[N,D]`): A 2D or 3D matrix containing the vertex positions. 
            W (`NDArray[float].shape[N,N]`): Optional. The affinity matrix defining the graph.
            vertex_opt (vertexOptObject) : The size/color/group vertex configuration object.
            title (string, default = ``''``) : The title to be printed above the image.
            online (bool, default = ``False``) : whether to create an online plot
            interactive (bool, default = ``False``) : whether to open an interactive plot on the browser
            plot_size (`List[int].shape[2]`, default = ``[1000,1000]``) : size of the canvas for the plotting operation.
            edge_width (float, default = ``0.5``) : thickness of the edges.
        
        Raises:
            ValueError: ``if X.shape[1] not in [2, 3]``
            
        Returns:
            None
        
        """

    if plot_filepath is None:
        plot_filepath = path.join(PLOT_FOLDER,
                                  str(datetime.datetime.now()) + ".png")

    plot_dim = X.shape[1]

    if plot_dim < 2 or plot_dim > 3:
        raise ValueError("plot_dim must be either 2 or 3")
    if (not W is None) and W.shape[0] != X.shape[0]:
        raise ValueError("Shapes of W, X do not match")

    if plot_dim > X.shape[1]:
        #Add missing dimensions
        temp = np.zeros((X.shape[0], plot_dim))
        temp[0:X.shape[0], 0:X.shape[1]] = X
        X = temp

    if not os.path.exists(os.path.dirname(plot_filepath)):
        os.makedirs(os.path.dirname(plot_filepath))

    def axis(dim_num):
        M = np.max(X[:, dim_num])
        m = np.min(X[:, dim_num])
        x = (M - m) / 2
        M = M + (M - x) * 0.2
        m = m + (m - x) * 0.2

        axis = dict(showline=False,
                    zeroline=False,
                    showgrid=False,
                    showticklabels=False,
                    visible=True,
                    title='',
                    range=[m, M])
        return (axis)

    scene = {}
    scene["xaxis"] = dict(axis(0))
    scene["yaxis"] = dict(axis(1))
    if plot_dim == 3:
        scene["zaxis"] = dict(axis(2))

    layout = go.Layout(
        paper_bgcolor='rgb(0,0,0,0)',
        plot_bgcolor='rgb(255,255,255)',
        title=title,
        font=dict(family='Courier New, bold', size=30, color='black'),
        width=plot_size[0],
        height=plot_size[1],
        legend=dict(x=0,
                    y=1,
                    traceorder='normal',
                    font=dict(family='sans-serif', size=30, color='#000'),
                    bgcolor='#E2E2E2',
                    bordercolor='#FFFFFF',
                    borderwidth=2),
        showlegend=True,
        xaxis=scene["xaxis"],
        yaxis=scene["yaxis"],
        margin=dict(t=100),
        hovermode='closest',
        annotations=[
            dict(showarrow=False,
                 text="</a>",
                 xref='paper',
                 yref='paper',
                 x=0,
                 y=0.1,
                 xanchor='left',
                 yanchor='bottom',
                 font=dict(size=14))
        ],
    )

    if "zaxis" in scene.keys():
        layout.update(go.Layout(zaxis=scene["zaxis"]))

    #Create Traces
    data = []

    if not W is None:
        trace_edge = _traceEdges(X=X,
                                 W=W,
                                 plot_dim=plot_dim,
                                 edge_width=edge_width)
        data = trace_edge

    trace_vertex = _traceVertex(X=X,
                                labeledIndexes=labeledIndexes,
                                plot_dim=plot_dim,
                                v_opt=vertex_opt)
    data += trace_vertex

    #Create figure
    fig = go.Figure(data=data, layout=layout)

    LOG.info("Plotting graph ({}) ...".format(title), LOG.ll.OUTPUT)
    if online:
        try:
            py.iplot(fig)
        except:
            LOG.warn("Warning: Could not plot online", LOG.ll.OUTPUT)

    if interactive:
        pyoff.offline.plot(fig)
    pio.write_image(fig, plot_filepath)
    LOG.info("Plotting graph({})...Done!".format(title), LOG.ll.OUTPUT)
コード例 #19
0
    """ Gets the color values for a discrete palette. """
    #palette = "husl"
    if Y.shape[0] == -1:
        raise ""
    Y = Y - np.min(Y)
    pal = sns.color_palette(palette, np.max(Y) + 10)
    #pal = [pal[2],pal[6]]
    res = 255 * np.array(list(map(lambda k: (pal[int(k)]), Y)))

    #print(res)
    return (res)


def color_scale_continuous(Y, palette="coolwarm", num_palette=70):
    """ Gets the color values for a continuous palette. """
    Y = (Y - np.min(Y))
    Y = Y / np.max(Y)
    Y[np.isnan(Y)] = 0.5

    pal = ig.AdvancedGradientPalette(sns.color_palette(palette, 7),
                                     n=num_palette + 1)
    res = 255 * np.array(
        list(map(lambda k: (pal.get(int(num_palette * k))), Y)))
    res = res.astype(np.int64)

    return res


if __name__ == "__main__":
    LOG.info(PLOT_FOLDER, LOG.ll.OTHER)
コード例 #20
0
 def rmFolders(self):
     if self.keep_images:
         return
     shutil.rmtree(os.path.join(self.filename_dir,
                                self.temp_subfolder_name))
     LOG.info("Deleted images....", LOG.ll.HOOK)
コード例 #21
0
 def _end(self, **kwargs):
     LOG.info("Best F1 score: {}".format(self.best_f1), LOG.ll.HOOK)
     pass
コード例 #22
0
    def get_all_configs(self):
        """Gets the configuration for every  experiment. The corresponding prefix is added for each stage.
            Returns:
            `List[dict]` A list of all possible configs. 
        """
        g = self.generalConfig()
        for x in g:
            x["spec_name"] = self.get_spec_name()
        Z = [(g,GENERAL_PREFIX),
             (self.inputConfig(),INPUT_PREFIX),
             (self.noiseConfig(),NOISE_PREFIX),
             (self.filterConfig(),FILTER_PREFIX),
             (self.affmatConfig(),AFFMAT_PREFIX),
             (self.algConfig(),ALG_PREFIX)\
             ]
        l = [[spec.add_key_prefix(y, elem) for elem in x] for x, y in Z]
        res = list(reduce(lambda x, y: spec.comb(x, y), l))

        res = self.remove_undesirable_configs(res)
        LOG.info("Number of configurations: {}".format(len(res)))

        if self.CACHE_AFFMAT:
            res_df = pd.DataFrame(res)
            _cache_col = AFFMAT_PREFIX + "cache_dir"
            res_df[_cache_col] = '/tmp/{}'.format(self.get_spec_name())

            relevant_keys = self.get_keys_relevant_to_affmat()

            class Counter():
                def __init__(self):
                    self.counter = 0

                def inc(self):
                    self.counter += 1

                def value(self):
                    return self.counter

            COUNTER = Counter()

            def add_cache(df):

                df[_cache_col] = [
                    os.path.join(x, str(COUNTER.value()))
                    for x in df[_cache_col].values
                ]
                COUNTER.inc()
                print(COUNTER.value())
                return df

            res_df = res_df.fillna('__nan')
            for g, df in res_df.groupby(relevant_keys):
                print(g)
            res_df = res_df.groupby(relevant_keys).apply(add_cache)
            res_df = res_df.sort_values(axis=0,
                                        by=[_cache_col, GENERAL_PREFIX + 'id'])
            """    
            ----------------------------------------
                Cache Directory handling
            -----------------------------------------
            """
            if self.CLEAN_AFFMAT_DIR:
                for _dir in pd.unique(res_df[_cache_col]):
                    print(_dir)
                    if os.path.isdir(_dir):
                        shutil.rmtree(_dir)
            print(list(res_df.index))
            res = list(np.array(res)[list(res_df.index)])

            _caches = res_df[_cache_col].values
            for i, dct in enumerate(res):
                dct[_cache_col] = _caches[i]

        return res