def test_inverse_transform():
    # Test FastICA.inverse_transform
    n_features = 10
    n_samples = 100
    n1, n2 = 5, 10
    rng = np.random.RandomState(0)
    X = rng.random_sample((n_samples, n_features))
    expected = {(True, n1): (n_features, n1),
                (True, n2): (n_features, n2),
                (False, n1): (n_features, n2),
                (False, n2): (n_features, n2)}
    for whiten in [True, False]:
        for n_components in [n1, n2]:
            n_components_ = (n_components if n_components is not None else
                             X.shape[1])
            ica = FastICA(n_components=n_components, random_state=rng,
                          whiten=whiten)
            with warnings.catch_warnings(record=True):
                # catch "n_components ignored" warning
                Xt = ica.fit_transform(X)
            expected_shape = expected[(whiten, n_components_)]
            assert_equal(ica.mixing_.shape, expected_shape)
            X2 = ica.inverse_transform(Xt)
            assert_equal(X.shape, X2.shape)

            # reversibility test in non-reduction case
            if n_components == X.shape[1]:
                assert_array_almost_equal(X, X2)
Example #2
0
def getHeartRate(window, lastHR):
    # Normalize across the window to have zero-mean and unit variance
    mean = np.mean(window, axis=0)
    std = np.std(window, axis=0)
    normalized = (window - mean) / std

    # Separate into three source signals using ICA
    ica = FastICA()
    srcSig = ica.fit_transform(normalized)

    # Find power spectrum
    powerSpec = np.abs(np.fft.fft(srcSig, axis=0))**2
    freqs = np.fft.fftfreq(WINDOW_SIZE, 1.0 / FPS)

    # Find heart rate
    maxPwrSrc = np.max(powerSpec, axis=1)
    validIdx = np.where((freqs >= MIN_HR_BPM / SEC_PER_MIN) & (freqs <= MAX_HR_BMP / SEC_PER_MIN))
    validPwr = maxPwrSrc[validIdx]
    validFreqs = freqs[validIdx]
    maxPwrIdx = np.argmax(validPwr)
    hr = validFreqs[maxPwrIdx]
    print hr

    #plotSignals(normalized, "Normalized color intensity")
    #plotSignals(srcSig, "Source signal strength")
    #plotSpectrum(freqs, powerSpec)

    return hr
Example #3
0
    def ica(self, n_components=None):
        """Return result from independent component analysis.

        X = SA + m

        Sklearn's FastICA implementation is used.

        Parameters
        ----------
        n_components : int, optional
            Number of ICA components.

        Returns
        -------
        source : Matrix
            Estimated source matrix (S)
        mixing_matrix : Matrix
            Estimated mixing matrix (A)
        mean_vector : brede.core.vector.Vector
            Estimated mean vector

        References
        ----------
        http://scikit-learn.org/stable/modules/decomposition.html#ica

        """
        if n_components is None:
            n_components = int(np.ceil(np.sqrt(float(min(self.shape)) / 2)))

        ica = FastICA(n_components=n_components)
        sources = Matrix(ica.fit_transform(self.values), index=self.index)
        mixing_matrix = Matrix(ica.mixing_.T, columns=self.columns)
        mean_vector = Vector(ica.mean_, index=self.columns)

        return sources, mixing_matrix, mean_vector
Example #4
0
def test_inverse_transform():
    """Test FastICA.inverse_transform"""
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10))
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10))
    n_features = X.shape[1]
    expected = {(True, 5): (n_features, 5),
                (True, 10): (n_features, 10),
                (False, 5): (n_features, 10),
                (False, 10): (n_features, 10)}

    for whiten in [True, False]:
        for n_components in [5, 10]:
            ica = FastICA(n_components=n_components, random_state=rng,
                          whiten=whiten)
            Xt = ica.fit_transform(X)
            expected_shape = expected[(whiten, n_components)]
            assert_equal(ica.mixing_.shape, expected_shape)
            X2 = ica.inverse_transform(Xt)
            assert_equal(X.shape, X2.shape)

            # reversibility test in non-reduction case
            if n_components == X.shape[1]:
                assert_array_almost_equal(X, X2)
Example #5
0
File: ica.py Project: kuntzer/sclas
class ICA(method.Method):
	
	def __init__(self, params):
		self.params = params
		self.ica = FastICA(**params)
	
	def __str__(self):
		return "FastICA"
		
	def train(self, data):
		"""
		Train the FastICA on the withened data
		
		:param data: whitened data, ready to use
		"""
		self.ica.fit(data)
	
	def encode(self, data):
		"""
		Encodes the ready to use data
		
		:returns: encoded data with dimension n_components
		"""
		return self.ica.transform(data)
	
	def decode(self, components):
		"""
		Decode the data to return whitened reconstructed data
		
		:returns: reconstructed data
		"""
		return self.ica.inverse_transform(components)
Example #6
0
    def RunICAScikit():
      totalTimer = Timer()

      # Load input dataset.
      data = np.genfromtxt(self.dataset, delimiter=',')

      opts = {}
      if "num_components" in options:
        opts["n_components"] = int(options.pop("num_components"))

      if "algorithm" in options:
        opts["algorithm"] = str(options.pop("algorithm"))
        if opts["algorithm"] not in ['parallel', 'deflation']:
          Log.Fatal("Invalid value for algorithm: "+ str(algorithm.group(1))+" .Must be either parallel or deflation")
          return -1

      if "function" in options:
        opts["fun"] = str(options.pop("function"))
        if opts["fun"] not in ['logcosh', 'exp', 'cube']:
          Log.Fatal("Invalid value for fun: "+ str(fun.group(1))+" .Must be either logcosh,exp or cube")
          return -1

      if "tolerance" in options:
        opts["tol"] = float(options.pop("tolerance"))

      try:
        # Perform ICA.
        with totalTimer:
          model = FastICA(**opts)
          ic = model.fit(data).transform(data)
      except Exception as e:
        return -1

      return totalTimer.ElapsedTime()
Example #7
0
 def wrapper_fastica(data, random_state=None):
     """Call FastICA implementation from scikit-learn."""
     ica = FastICA(random_state=random_state)
     ica.fit(cat_trials(data).T)
     u = ica.components_.T
     m = ica.mixing_.T
     return m, u
Example #8
0
    def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)
        
        ##
        ## ICA
        ##
        ica = FastICA(n_components=X_train_scl.shape[1])
        X_ica = ica.fit_transform(X_train_scl)
        
        ##
        ## Plots
        ##
        ph = plot_helper()

        kurt = kurtosis(X_ica)
        print(kurt)
        
        title = 'Kurtosis (FastICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'
        
        ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
                           kurt,
                           np.arange(1, len(kurt)+1, 1).astype('str'),
                           'Feature Index',
                           'Kurtosis',
                           title,
                           filename)
Example #9
0
 def best_ica_nba(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_nba_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ica = FastICA(n_components=X_train_scl.shape[1])
     X_train_transformed = ica.fit_transform(X_train_scl, y_train)
     X_test_transformed = ica.transform(X_test_scl)
     
     ## top 2
     kurt = kurtosis(X_train_transformed)
     i = kurt.argsort()[::-1]
     X_train_transformed_sorted = X_train_transformed[:, i]
     X_train_transformed = X_train_transformed_sorted[:,0:2]
     
     kurt = kurtosis(X_test_transformed)
     i = kurt.argsort()[::-1]
     X_test_transformed_sorted = X_test_transformed[:, i]
     X_test_transformed = X_test_transformed_sorted[:,0:2]
     
     # save
     filename = './' + self.save_dir + '/nba_ica_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_ica_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_ica_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_ica_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
Example #10
0
    def filter_frames(self, data):
        logging.debug("I am starting the old componenty vous")
        data = data[0]
        print 'The length of the data is'+str(data.shape)
        sh = data.shape
        newshape = (np.prod(sh[:-1]), sh[-1])
        print "The shape of the data is:"+str(data.shape) + str(newshape)
        data = np.reshape(data, (newshape))
        # data will already be shaped correctly
        logging.debug("Making the matrix")
        ica = FastICA(n_components=self.parameters['number_of_components'],
                      algorithm='parallel',
                      whiten=self.parameters['whiten'],
                      w_init=self.parameters['w_init'],
                      random_state=self.parameters['random_state'])
        logging.debug("Performing the fit")
        data = self.remove_nan_inf(data)  #otherwise the fit flags up an error for obvious reasons
#         print "I'm here"
        S_ = ica.fit_transform(data)
#         print "S_Shape is:"+str(S_.shape)
#         print "self.images_shape:"+str(self.images_shape)
        scores = np.reshape(S_, (self.images_shape))
        eigenspectra = ica.components_
        logging.debug("mange-tout")
        return [scores, eigenspectra]
 def __create_image_obser(self, image_observations) :
     """
     Creation of a space in which the images will be compared (learning stage).
     Firstly PCA is applied in order to reduce the number of features in the
     images. Reduction is done so that 99% of measured variance is covered.
     
     After that, ICA is performed on the coefficients calculated by transforming
     (reducing) the face images with PCA. From the learned ICA components
     basis_images (vectors), original images coefficients and transformation
     for new comming images are extracted.
     """
     pca = PCA()
     pca.fit(image_observations)
     sum = 0
     components_to_take = 0
     for ratio in pca.explained_variance_ratio_:
         components_to_take += 1
         sum += ratio
         if (sum > 0.99):
             break 
     print("PCA reduces the number of dimensions to: " + str(components_to_take))
     pca = PCA(whiten=True, n_components=components_to_take)
     self.__transformed_images = pca.fit_transform(image_observations)
     self.__transformed_images_mean = np.mean(self.__transformed_images, axis=0)
     self.__transformed_images -= self.__transformed_images_mean
     self.__pca = pca
     
     
     ica = FastICA(whiten=True, max_iter=100000)
     self.__original_images_repres = ica.fit_transform(self.__transformed_images)
     self.__basis_images = ica.mixing_.T
     self.__transformation = ica.components_
 def reduceDataset(self,nr=3,method='PCA'):
     '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
      Methods available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     #dataset=self.dataset[Model.in_columns]
     #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
     #PCA
     if method=='PCA':
         sklearn_pca = sklearnPCA(n_components=nr)
         reduced = sklearn_pca.fit_transform(dataset)
     #Factor Analysis
     elif method=='FactorAnalysis':
         fa=FactorAnalysis(n_components=nr)
         reduced=fa.fit_transform(dataset)
     #kernel pca with rbf kernel
     elif method=='KPCArbf':
         kpca=KernelPCA(nr,kernel='rbf')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with poly kernel
     elif method=='KPCApoly':
         kpca=KernelPCA(nr,kernel='poly')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with cosine kernel
     elif method=='KPCAcosine':
         kpca=KernelPCA(nr,kernel='cosine')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with sigmoid kernel
     elif method=='KPCAsigmoid':
         kpca=KernelPCA(nr,kernel='sigmoid')
         reduced=kpca.fit_transform(dataset)
     #ICA
     elif method=='IPCA':
         ipca=IncrementalPCA(nr)
         reduced=ipca.fit_transform(dataset)
     #Fast ICA
     elif method=='FastICAParallel':
         fip=FastICA(nr,algorithm='parallel')
         reduced=fip.fit_transform(dataset)
     elif method=='FastICADeflation':
         fid=FastICA(nr,algorithm='deflation')
         reduced=fid.fit_transform(dataset)
     elif method == 'All':
         self.dimensionalityReduction(nr=nr)
         return self
     
     self.ModelInputs.update({method:reduced})
     self.datasetsAvailable.append(method)
     return self
Example #13
0
def ica(tx, ty, rx, ry):
    compressor = ICA(whiten=True)  # for some people, whiten needs to be off
    newtx = compressor.fit_transform(tx)
    newrx = compressor.fit_transform(rx)
    em(newtx, ty, newrx, ry, add="wICAtr", times=10)
    km(newtx, ty, newrx, ry, add="wICAtr", times=10)
    nn(newtx, ty, newrx, ry, add="wICAtr")
Example #14
0
def align(movie_data, options, args, lrh):
    print 'pICA(scikit-learn)'
    nvoxel = movie_data.shape[0]
    nTR    = movie_data.shape[1]
    nsubjs = movie_data.shape[2]

    align_algo = args.align_algo
    nfeature   = args.nfeature
    randseed    = args.randseed
    if not os.path.exists(options['working_path']):
        os.makedirs(options['working_path'])

    # zscore the data
    bX = np.zeros((nsubjs*nvoxel,nTR))
    for m in range(nsubjs):
        bX[m*nvoxel:(m+1)*nvoxel,:] = stats.zscore(movie_data[:, :, m].T ,axis=0, ddof=1).T
    del movie_data
 
    np.random.seed(randseed)
    A = np.mat(np.random.random((nfeature,nfeature)))

    ica = FastICA(n_components= nfeature, max_iter=500,w_init=A,random_state=randseed)
    St = ica.fit_transform(bX.T)
    ES = St.T
    bW = ica.mixing_

    R = np.zeros((nvoxel,nfeature,nsubjs))
    for m in range(nsubjs):
        R[:,:,m] = bW[m*nvoxel:(m+1)*nvoxel,:]

    niter = 10  
    # initialization when first time run the algorithm
    np.savez_compressed(options['working_path']+align_algo+'_'+lrh+'_'+str(niter)+'.npz',\
                                R = R, G=ES.T, niter=niter)
    return niter
def main(mode):
    path = "/local/attale00/extracted_pascal__4__Multi-PIE"
    path_ea = path + "/color128/"

    allLabelFiles = utils.getAllFiles("/local/attale00/a_labels")

    labeledImages = [i[0:16] + ".png" for i in allLabelFiles]

    # labs=utils.parseLabelFiles(path+'/Multi-PIE/labels','mouth',labeledImages,cutoffSeq='.png',suffix='_face0.labels')
    labs = utils.parseLabelFiles(
        "/local/attale00/a_labels", "mouth", labeledImages, cutoffSeq=".png", suffix="_face0.labels"
    )

    testSet = fg.dataContainer(labs)
    roi = (50, 74, 96, 160)
    X = fg.getAllImagesFlat(path_ea, testSet.fileNames, (128, 256), roi=roi)

    # perform ICA
    if mode not in ["s", "v"]:
        ica = FastICA(n_components=100, whiten=True)
        ica.fit(X)
        meanI = np.mean(X, axis=0)
        X1 = X - meanI
        data = ica.transform(X1)
        filters = ica.components_

    elif mode in ["s", "v"]:
        W = np.load("/home/attale00/Desktop/classifiers/ica/filter1.npy")
        m = np.load("/home/attale00/Desktop/classifiers/ica/meanI1.npy")
        X1 = X - m
        data = np.dot(X1, W.T)

    for i in range(len(testSet.data)):
        testSet.data[i].extend(data[i, :])

    strel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))

    # fg.getHogFeature(testSet,roi,path=path_ea,ending='.png',extraMask = None,orientations = 3, cells_per_block=(6,2),maskFromAlpha=False)
    # fg.getColorHistogram(testSet,roi,path=path_ea,ending='.png',colorspace='lab',bins=10)
    testSet.targetNum = map(utils.mapMouthLabels2Two, testSet.target)

    rf = classifierUtils.standardRF(max_features=np.sqrt(len(testSet.data[0])), min_split=5, max_depth=40)
    if mode in ["s", "v"]:
        print "Classifying with loaded classifier"
        classifierUtils.classifyWithOld(
            path, testSet, mode, clfPath="/home/attale00/Desktop/classifiers/ica/rf128ICA_1"
        )
    elif mode in ["c"]:
        print "cross validation of data"
        print "Scores"
        # print classifierUtils.standardCrossvalidation(rf,testSet,n_jobs=5)
        # _cvDissect(testSet,rf)
        classifierUtils.dissectedCV(rf, testSet)
        print "----"

    elif mode in ["save"]:
        print "saving new classifier"
        _saveRF(testSet)
    else:
        print "not doing anything"
Example #16
0
def run_ica(data, comp):
    ica = FastICA(n_components=comp, whiten=True, max_iter=5000)
    data_out=np.zeros((comp,np.shape(data[0,:,0])[0],np.shape(data[0,0,:])[0]))
    for i in range(np.shape(data[0,:,0])[0]):
        print i
        data_out[:,i,:]=np.transpose(ica.fit_transform(np.transpose(data[:,i,:])))
    return data_out
Example #17
0
def dim_survey(X, entry_id):

    # convert to numpy
    X = np.array(X)

    # run the reduction.
    X_pca = PCA(n_components=3).fit_transform(X)
    X_tsne = TSNE(n_components=3).fit_transform(X)
    X_ica = FastICA(n_components=3).fit_transform(X)

    # connect to db.
    with mongoctx() as db:

        # update the stuff.
        db['entry'].update(
            {
                '_id': ObjectId(entry_id)
            },
            {
                '$set': {
                    'pca': X_pca.tolist(),
                    'tsne': X_tsne.tolist(),
                    'ica': X_ica.tolist(),
                }
            }
        )
def test_ica(eng):
    t = linspace(0, 10, 100)
    s1 = sin(t)
    s2 = square(sin(2*t))
    x = c_[s1, s2, s1+s2]
    random.seed(0)
    x += 0.001*random.randn(*x.shape)
    x = fromarray(x, engine=eng)

    def normalize_ICA(s, aT):
        a = aT.T
        c = a.sum(axis=0)
        return s*c, (a/c).T

    from sklearn.decomposition import FastICA
    ica = FastICA(n_components=2, fun='cube', random_state=0)
    s1 = ica.fit_transform(x.toarray())
    aT1 = ica.mixing_.T
    s1, aT1 = normalize_ICA(s1, aT1)

    s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x)
    s2, aT2 = normalize_ICA(s2, aT2)
    tol=1e-1
    assert allclose_sign_permute(s1, s2, atol=tol)
    assert allclose_sign_permute(aT1, aT2, atol=tol)
Example #19
0
    def fit(self, x, y, i=0):
        # if gaussian processes are being used, data dimensionality needs to be reduced before fitting
        if self.method[i] == 'GP':
            if self.reduce_dim == 'FastICA':
                print('Reducing dimensionality with ICA')
                do_ica = FastICA(n_components=self.n_components)
                self.do_reduce_dim = do_ica.fit(x)
            if self.reduce_dim == 'PCA':
                print('Reducing dimensionality with PCA')
                do_pca = PCA(n_components=self.n_components)
                self.do_reduce_dim = do_pca.fit(x)

            x = self.do_reduce_dim.transform(x)
        #try:
            print('Training model...')
        try:
            self.model.fit(x, y)
            self.goodfit = True
            print(self.model)
        except:
            self.goodfit = False
            if self.method[i] == 'GP':
                print('Model failed to train! (For GP this does not always indicate a problem, especially for low numbers of components.)')
                pass
            else:
                print('Model failed to train!')
                traceback.print_stack()

        if self.ransac:
            self.outliers = np.logical_not(self.model.inlier_mask_)
            print(str(np.sum(self.outliers)) + ' outliers removed with RANSAC')
Example #20
0
    def ica(self, n_components=None, sources='left'):
        """Return result from independent component analysis.

        X = SA + m

        Sklearn's FastICA implementation is used.

        When sources=left the sources are returned in the first (left) matrix
        and the mixing matrix is returned in the second (right) matrix,
        corresponding to X = SA.

        When sources=right the sources are returned in the second matrix while
        the mixing matrix is returned in the first, corresponding to X = AS.

        Parameters
        ----------
        n_components : int, optional
            Number of ICA components.
        sources : left or right, optional
            Indicates whether the sources should be the left or right matrix.

        Returns
        -------
        first : Matrix
            Estimated source matrix (S) if sources=left.
        second : Matrix
            Estimated mixing matrix (A) if sources=right.
        mean_vector : brede.core.vector.Vector
            Estimated mean vector

        References
        ----------
        http://scikit-learn.org/stable/modules/decomposition.html#ica

        """
        if n_components is None:
            min_shape = min(self.shape[0], len(self._eeg_columns))
            n_components = int(np.ceil(sqrt(float(min_shape) / 2)))

        ica = FastICA(n_components=n_components)

        if sources == 'left':
            sources = Matrix(ica.fit_transform(
                self.ix[:, self._eeg_columns].values),
                index=self.index)
            mixing_matrix = Matrix(ica.mixing_.T, columns=self._eeg_columns)
            mean_vector = Vector(ica.mean_, index=self._eeg_columns)
            return sources, mixing_matrix, mean_vector

        elif sources == 'right':
            sources = Matrix(ica.fit_transform(
                self.ix[:, self._eeg_columns].values.T).T,
                columns=self._eeg_columns)
            mixing_matrix = Matrix(ica.mixing_, index=self.index)
            mean_vector = Vector(ica.mean_, index=self.index)
            return mixing_matrix, sources, mean_vector

        else:
            raise ValueError('Wrong argument to "sources"')
 def dimensionalityReduction(self,nr=5):
     '''It applies all the dimensionality reduction techniques available in this class:
     Techniques available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     sklearn_pca = sklearnPCA(n_components=nr)
     p_components = sklearn_pca.fit_transform(dataset)
     fa=FactorAnalysis(n_components=nr)
     factors=fa.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='rbf')
     rbf=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='poly')
     poly=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='cosine')
     cosine=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='sigmoid')
     sigmoid=kpca.fit_transform(dataset)
     ipca=IncrementalPCA(nr)
     i_components=ipca.fit_transform(dataset)
     fip=FastICA(nr,algorithm='parallel')
     fid=FastICA(nr,algorithm='deflation')
     ficaD=fip.fit_transform(dataset)
     ficaP=fid.fit_transform(dataset)
     '''isomap=Isomap(n_components=nr).fit_transform(dataset)
     try:
         lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
     except ValueError:
         lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
     try:
         
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
     except ValueError:
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) 
     try:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
     except ValueError:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
     values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
     keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
     self.ModelInputs.update(dict(zip(keys, values)))
     [self.datasetsAvailable.append(key) for key in keys ]
     
     #debug
     #dataset=pd.DataFrame(self.ModelInputs['Dataset'])
     #dataset['Output']=self.ModelOutput
     #self.debug['Dimensionalityreduction']=dataset
     ###
     return self
Example #22
0
def transform(data, n_components=3):
    features, weights, labels = data
    start = time()
    ica = FastICA(n_components=n_components)
    transformed = ica.fit_transform(features)
    elapsed = time() - start
    df = pd.DataFrame(transformed)
    return df, elapsed
Example #23
0
def wrapper_fastica(data):
    """ Call FastICA implementation from scikit-learn.
    """
    ica = FastICA()
    ica.fit(datatools.cat_trials(data))
    u = ica.components_.T
    m = ica.mixing_.T
    return m, u
Example #24
0
def independent_component(x, y):
    clf = FastICA(random_state=1)
    transformed = clf.fit_transform(x.reshape(-1, 1))
    comp = clf.components_[0, 0]
    mm = clf.mixing_[0, 0]
    src_max = transformed.max()
    src_min = transformed.min()
    return [comp, mm, src_max, src_min]
Example #25
0
File: lazy.py Project: ctw/eeglcf
def fastica(eeg_data):
    """
    Sample function to apply `FastICA`_ to the EEG data.

    Parameters
    ----------
    eeg_data : array
        EEG data in a CxTxE array. With C the number of channels, T the number
        of time samples and E the number of events.

    Returns
    -------
    ica : ICA object
        Trained `FastICA`_ object.
    ica_data : array
        EEG projected data in a CxTxE array. With C the number of components, T
        the number of time samples and E the number of events.
    """

    # Dimension shapes
    ch_len = eeg_data.shape[ch_dim]
    t_len = eeg_data.shape[t_dim]
    ev_len = eeg_data.shape[ev_dim]

    # -------------------------------------------------------------------------
    # 1. Fit the FastICA model

    # We need to collapse time and events dimensions
    coll_data = eeg_data.transpose([t_dim, ev_dim, ch_dim])\
        .reshape([t_len*ev_len, ch_len])

    # Fit model
    ica = FastICA()
    ica.fit(coll_data)

    # Normalize ICs to unit norm
    k = np.linalg.norm(ica.mixing_, axis=0)  # Frobenius norm
    ica.mixing_ /= k
    ica.components_[:] = (ica.components_.T * k).T

    # -------------------------------------------------------------------------
    # 2. Transform data

    # Project data
    bss_data = ica.transform(coll_data)

    # Adjust shape and dimensions back to "eeg_data" shape
    ic_len = bss_data.shape[1]
    bss_data = np.reshape(bss_data, [ev_len, t_len, ic_len])
    new_order = [0, 0, 0]
    # TODO: Check the following order
    new_order[ev_dim] = 0
    new_order[ch_dim] = 2
    new_order[t_dim] = 1
    bss_data = bss_data.transpose(new_order)

    # End
    return ica, bss_data
    def _fit_local(self, data):


        from sklearn.decomposition import FastICA
        from numpy import random
        random.seed(self.seed)
        model = FastICA(n_components=self.k, fun="cube", max_iter=self.max_iter, tol=self.tol, random_state=self.seed)
        signals = model.fit_transform(data)
        return signals, model.mixing_.T
Example #27
0
 def ica(self):
     '''
     Perform ICA on the data
         source_matrix -- rows are sources, columns are time points, values are ?
         mixing_matrix -- rows are electrodes, columns are source, values are contributions of the electrode to the source
     '''
     ica = FastICA(self.number_of_sources)
     ica.fit(self.data)
     self.mixing_matrix = ica.mixing_  # estimated mixing matrix
Example #28
0
    def generate_peoples_results_files(self):

        self.np_result = np.c_[self.results[0]['blue'], self.results[0]['green'], self.results[0]['red']]
        list_number = len(self.results[0]['blue'])

        #  ICA
        ica = FastICA(n_components=3, fun='logcosh', max_iter=2000)
        ica_transformed = ica.fit_transform(self.np_result)
        component_all = ica_transformed.ravel([1])
        component_1 = component_all[:list_number]
        component_2 = component_all[list_number:(2 * list_number)]
        component_3 = component_all[(2 * list_number):(3 * list_number)]

        #  butter_smooth
        N = 8
        Wn = [1.6 / 30, 4.0 / 30]
        t = np.linspace(1 / 30, list_number / 30, list_number)
        b, a = signal.butter(N, Wn, 'bandpass', analog=False)
        filter_1 = signal.filtfilt(b, a, component_1)
        filter_2 = signal.filtfilt(b, a, component_2)
        filter_3 = signal.filtfilt(b, a, component_3)
        lowess_1 = sm.nonparametric.lowess(filter_1, t, frac=10.0 / list_number)
        lowess_2 = sm.nonparametric.lowess(filter_2, t, frac=10.0 / list_number)
        lowess_3 = sm.nonparametric.lowess(filter_3, t, frac=10.0 / list_number)

        smooths = []
        smooth_1 = lowess_1[:, 1]
        smooth_2 = lowess_2[:, 1]
        smooth_3 = lowess_3[:, 1]
        smooths.append(smooth_1)
        smooths.append(smooth_2)
        smooths.append(smooth_3)

        # FFT and spectrum
        fft_1 = np.fft.fft(smooth_1, 256)
        fft_2 = np.fft.fft(smooth_2, 256)
        fft_3 = np.fft.fft(smooth_3, 256)
        spectrum_1 = list(np.abs(fft_1) ** 2)
        spectrum_2 = list(np.abs(fft_2) ** 2)
        spectrum_3 = list(np.abs(fft_3) ** 2)
        max1 = max(spectrum_1)
        max2 = max(spectrum_2)
        max3 = max(spectrum_3)
        num_spec1 = spectrum_1.index(max(spectrum_1))
        if num_spec1 > (list_number / 2):
            num_spec1 = 256 - num_spec1
        num_spec2 = spectrum_2.index(max(spectrum_2))
        if num_spec2 > (list_number / 2):
            num_spec2 = 256 - num_spec2
        num_spec3 = spectrum_3.index(max(spectrum_3))
        if num_spec3 > (list_number / 2):
            num_spec3 = 256 - num_spec3
        num_spec = [num_spec1, num_spec2, num_spec3]
        max_all = [max1, max2, max3]
        max_num = max_all.index(max(max_all))
        self.heartRate = int(num_spec[max_num] * 1800 / 256) + 1
        return smooths[max_num]
def fit_transform_ica(X):
    ica = FastICA(n_components=50, max_iter=2000, tol=0.05, algorithm='parallel', fun='cube', fun_args={'alpha': 1.0}, random_state=42) #26 36 76
    start = time.time()
    X = ica.fit_transform(X)
    end = time.time()
    
    print "Done!\nFit ICA transform time (secs): {:.3f}".format(end - start)

    return X, ica
def independent_component(x, y):
    clf = FastICA(random_state=1)
    clf.fit(x.reshape(-1, 1), y)
    comp = clf.components_[0][0]
    mm = clf.get_mixing_matrix()[0][0]
    sources = clf.sources_.flatten()
    src_max = max(sources)
    src_min = min(sources)
    return [comp, mm, src_max, src_min]
Example #31
0
train, test = train[train_features], test[train_features]
print("\nTrain shape: {}\nTest shape: {}".format(train.shape, test.shape))

print("\nStart decomposition process...")
print("PCA")
pca = PCA(n_components=N_COMP, random_state=17)
pca_results_train = pca.fit_transform(train)
pca_results_test = pca.transform(test)

print("tSVD")
tsvd = TruncatedSVD(n_components=N_COMP, random_state=17)
tsvd_results_train = tsvd.fit_transform(train)
tsvd_results_test = tsvd.transform(test)

print("ICA")
ica = FastICA(n_components=N_COMP, random_state=17)
ica_results_train = ica.fit_transform(train)
ica_results_test = ica.transform(test)

print("GRP")
grp = GaussianRandomProjection(n_components=N_COMP, eps=0.1, random_state=17)
grp_results_train = grp.fit_transform(train)
grp_results_test = grp.transform(test)

print("SRP")
srp = SparseRandomProjection(n_components=N_COMP,
                             dense_output=True,
                             random_state=17)
srp_results_train = srp.fit_transform(train)
srp_results_test = srp.transform(test)
Example #32
0
from sklearn.decomposition import FastICA
import numpy
import string
import scipy

data = numpy.genfromtxt(open("modifiedLetter2.data"), delimiter=",")
print data.shape
X = numpy.delete(data, -1, 1)
print X.shape
ica = FastICA()
ica_sources = ica.fit_transform(X)
print scipy.stats.kurtosis(ica_sources)
Example #33
0
 def perform_fastICA(self):
     self.dm = FastICA(n_components=self.n_components)
     self.components = self.dm.fit_transform(self.dataset.transpose())
Example #34
0
def calculateCovariance(X):
    meanX = np.mean(X, axis=0)
    lenX = X.shape[0]
    X = X - meanX
    covariance = X.T.dot(X) / lenX
    return covariance


P = calculateCovariance(data)
print(P)
print(P.shape)

# eigenvalue decomposition
W, V = np.linalg.eig(P)
D = np.diag(W)

print(V @ D @ V.T - P)

# apply whitening ???

A = data.dot(V[1])
X = A / np.sqrt(W[1] + 1e-5)

print(X)

X = X.reshape(-1, 1)

transformer = FastICA()
X_transformed = transformer.fit_transform(X)
print(X_transformed.shape)
Example #35
0
                           plot=True,
                           targetcluster=3,
                           stats=True)  #kmeans.run()
#kmeans.run()
em = ExpectationMaximizationTestCluster(X_train,
                                        y_train,
                                        clusters=range(1, 31),
                                        plot=True,
                                        targetcluster=3,
                                        stats=True)
#em.run()

pca = PCA(n_components=3)
S_pca = pca.fit_transform(features)

ica = FastICA(n_components=3)
S_ica = ica.fit_transform(features)

rpg = random_projection.GaussianRandomProjection(n_components=3)
g_rpg = rpg.fit_transform(features)

spg = random_projection.SparseRandomProjection(n_components=3)
s_rp = spg.fit_transform(features)

threshold = [
    .01, .02, .03, .04, .05, .1, .20, .25, .30, .4, .5, .6, .7, .8, .9, 1
]

lvf = VarianceThreshold()
t_lvf = lvf.fit_transform(X_train)
Example #36
0
    plt.title('The Elbow Method showing the optimal k')
    plt.show()





X = load_wine().data
y = load_wine().target


scaler = StandardScaler()
scaler.fit(X)

X = scaler.transform(X)
ica = ICA(n_components=2)
ica.fit(X)

dr_X = ica.transform(X)

#obtain elbow plot
plot_elbow(dr_X)

#pick three clusters, and view a few groupings

km = KMeans(n_clusters=3,random_state=0).fit(dr_X)

labels = km.predict(dr_X)

print silhouette_score(dr_X,labels)
#print(ImagesTable[1])

 # my code

    
new_data = list()
for x in ImagesTable.iterrows(): # (3000,)
    data = x['image'].reshape(1440,1)
    new_data.append(data)

# ICA Processing 
print("next Pictures are proccesed with ICA ")

stack_new_data = np.hstack(new_data)
ica = FastICA(n_components=10)
S_ica_ = ica.fit_transform(stack_new_data)  # Reconstruct signals
A_ica_ = ica.mixing_  # Get estimated mixing matrix
abs_value_data = np.absolute(S_ica_)

final_data = [abs_value_data[:,y].reshape(12,120) for y in range(10)]
for y in range(10):
    plt.matshow(final_data[y])   
    plt.show()    
    
print("next Pictures are proccesed with PCA ")

#PCA Data Processing 

pca = PCA(n_components=10)
PCA_data = pca.fit_transform(stack_new_data)


# PCA

pca = PCA(n_components=n_comp, random_state=42)

pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))

pca2_results_test = pca.transform(test)



# ICA

ica = FastICA(n_components=n_comp, random_state=42)

ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))

ica2_results_test = ica.transform(test)



# Append decomposition components to datasets

for i in range(1, n_comp+1):

    train['pca_' + str(i)] = pca2_results_train[:,i-1]

    test['pca_' + str(i)] = pca2_results_test[:, i-1]
Example #39
0
def cluster_activations(separated_activations,
                        nb_clusters=2,
                        nb_dims=10,
                        reduce='FastICA',
                        clustering_method='KMeans'):
    """
    Clusters activations and returns two arrays.
    1) separated_clusters: where separated_clusters[i] is a 1D array indicating which cluster each datapoint
    in the class has been assigned
    2) separated_reduced_activations: activations with dimensionality reduced using the specified reduce method

    :param separated_activations: list where separated_activations[i] is a np matrix for the ith class where
    each row corresponds to activations for a given data point
    :type separated_activations: `list`
    :param nb_clusters: number of clusters (defaults to 2 for poison/clean)
    :type nb_clusters: `int`
    :param nb_dims: number of dimensions to reduce activation to via PCA
    :type nb_dims: `int`
    :param reduce: Method to perform dimensionality reduction, default is FastICA
    :type reduce: `str`
    :param clustering_method: Clustering method to use, default is KMeans
    :type clustering_method: `str`
    :return: separated_clusters, separated_reduced_activations
    :rtype: `tuple`
    """
    from sklearn.cluster import KMeans
    from sklearn.decomposition import FastICA, PCA

    separated_clusters = []
    separated_reduced_activations = []

    if reduce == 'FastICA':
        projector = FastICA(n_components=nb_dims, max_iter=1000, tol=0.005)
    elif reduce == 'PCA':
        projector = PCA(n_components=nb_dims)
    else:
        raise ValueError(reduce +
                         " dimensionality reduction method not supported.")

    if clustering_method == 'KMeans':
        clusterer = KMeans(n_clusters=nb_clusters)
    else:
        raise ValueError(clustering_method +
                         " clustering method not supported.")

    for i, ac in enumerate(separated_activations):
        # Apply dimensionality reduction
        nb_activations = np.shape(ac)[1]
        if nb_activations > nb_dims:
            reduced_activations = projector.fit_transform(ac)
        else:
            logger.info(
                "Dimensionality of activations = %i less than nb_dims = %i. Not applying dimensionality "
                "reduction.", nb_activations, nb_dims)
            reduced_activations = ac
        separated_reduced_activations.append(reduced_activations)

        # Get cluster assignments
        clusters = clusterer.fit_predict(reduced_activations)
        separated_clusters.append(clusters)

    return separated_clusters, separated_reduced_activations
Example #40
0
def decompose(values,
              other_value_sets={},
              centroids={},
              method=None,
              number_of_components=None,
              random=False):

    if method is None:
        method = defaults["decomposition_method"]
    method = proper_string(normalise_string(method),
                           DECOMPOSITION_METHOD_NAMES)

    if number_of_components is None:
        number_of_components = defaults["decomposition_dimensionality"]

    other_values_provided_as_dictionary = True
    if other_value_sets is not None and not isinstance(other_value_sets, dict):
        other_value_sets["unknown"] = other_value_sets
        other_values_provided_as_dictionary = False

    if random:
        random_state = None
    else:
        random_state = 42

    if method == "PCA":
        if (values.shape[1] <= MAXIMUM_FEATURE_SIZE_FOR_NORMAL_PCA
                and not scipy.sparse.issparse(values)):
            model = PCA(n_components=number_of_components)
        else:
            model = IncrementalPCA(n_components=number_of_components,
                                   batch_size=100)
    elif method == "SVD":
        model = TruncatedSVD(n_components=number_of_components)
    elif method == "ICA":
        model = FastICA(n_components=number_of_components)
    elif method == "t-SNE":
        if number_of_components < 4:
            tsne_method = "barnes_hut"
        else:
            tsne_method = "exact"
        model = TSNE(n_components=number_of_components,
                     method=tsne_method,
                     random_state=random_state)
    else:
        raise ValueError("Method `{}` not found.".format(method))

    values_decomposed = model.fit_transform(values)

    if other_value_sets and method != "t_sne":
        other_value_sets_decomposed = {}
        for other_set_name, other_values in other_value_sets.items():
            if other_values is not None:
                other_value_decomposed = model.transform(other_values)
            else:
                other_value_decomposed = None
            other_value_sets_decomposed[other_set_name] = (
                other_value_decomposed)
    else:
        other_value_sets_decomposed = None

    if other_value_sets_decomposed and not other_values_provided_as_dictionary:
        other_value_sets_decomposed = other_value_sets_decomposed["unknown"]

    # Only supports centroids without data sets as top levels
    if centroids is not None and method == "PCA":
        if "means" in centroids:
            centroids = {"unknown": centroids}
        components = model.components_
        centroids_decomposed = {}
        for distribution, distribution_centroids in centroids.items():
            if distribution_centroids:
                centroids_distribution_decomposed = {}
                for parameter, parameter_values in (
                        distribution_centroids.items()):
                    if parameter == "means":
                        shape = numpy.array(parameter_values.shape)
                        original_dimension = shape[-1]
                        reshaped_parameter_values = parameter_values.reshape(
                            -1, original_dimension)
                        decomposed_parameter_values = model.transform(
                            reshaped_parameter_values)
                        shape[-1] = number_of_components
                        new_parameter_values = (
                            decomposed_parameter_values.reshape(shape))
                    elif parameter == "covariance_matrices":
                        shape = numpy.array(parameter_values.shape)
                        original_dimension = shape[-1]
                        reshaped_parameter_values = parameter_values.reshape(
                            -1, original_dimension, original_dimension)
                        n_centroids = reshaped_parameter_values.shape[0]
                        decomposed_parameter_values = numpy.empty(
                            shape=(n_centroids, 2, 2))
                        for i in range(n_centroids):
                            decomposed_parameter_values[i] = (
                                components @ reshaped_parameter_values[i]
                                @ components.T)
                        shape[-2:] = number_of_components
                        new_parameter_values = (
                            decomposed_parameter_values.reshape(shape))
                    else:
                        new_parameter_values = parameter_values
                    centroids_distribution_decomposed[parameter] = (
                        new_parameter_values)
                centroids_decomposed[distribution] = (
                    centroids_distribution_decomposed)
            else:
                centroids_decomposed[distribution] = None
        if "unknown" in centroids_decomposed:
            centroids_decomposed = centroids_decomposed["unknown"]
    else:
        centroids_decomposed = None

    output = [values_decomposed]

    if other_value_sets != {}:
        output.append(other_value_sets_decomposed)

    if centroids != {}:
        output.append(centroids_decomposed)

    return output
Example #41
0
        yj = np.abs(np.corrcoef(train[j], train['y'])[0, 1])
        yij = np.abs(np.corrcoef(train[j] + train[i], train['y'])[0, 1])
        if yij > yi + yj:
            print(i + '_' + j + ': ' + str(yi) + ' ' + str(yj) + ' ' +
                  str(yij))
            interact_feat['Itr_' + i + '_' + j] = all_dt[j] + all_dt[i]
interact_feat = feat_standardize(interact_feat)

# 2. PCA, ICA & SVD
n_comp = 12
random_seed = 624
# 2.1 PCA
pca = PCA(n_components=n_comp, random_state=random_seed)
pca_feat = pca.fit_transform(all_dt.drop(['y'], axis=1))
# 2.2 ICA
ica = FastICA(n_components=n_comp, random_state=random_seed)
ica_feat = ica.fit_transform(all_dt.drop(['y'], axis=1))
# 2.3 SVD
svd = TruncatedSVD(n_components=n_comp, random_state=random_seed)
svd_feat = svd.fit_transform(all_dt.drop(['y'], axis=1))
# 2.4 TSNE
tsne = TSNE(n_components=3, random_state=random_seed, verbose=1)
tsne_feat = tsne.fit_transform(all_dt.drop(['y'], axis=1))
# 2.5 KMeans
kmeans = KMeans(n_clusters=4, random_state=random_seed)
kmeans_feat = kmeans.fit_transform(all_dt.drop(['y'], axis=1))
# 2.6 Logistic PCA

# 3. Random Projection
# 3.1 SRP
srp = SparseRandomProjection(n_components=n_comp,
Example #42
0
    cached = True
elif args.nmf and os.path.exists(f"cache/nmf-{args.n_components}.pkl"):
    with open(f"cache/nmf-{args.n_components}.pkl", "rb") as f:
        sklearn_transformer = pickle.load(f)
    cached = True
elif args.truncatedsvd and os.path.exists(
        f"cache/truncatedsvd-{args.n_components}.pkl"):
    with open(f"cache/truncatedsvd-{args.n_components}.pkl", "rb") as f:
        sklearn_transformer = pickle.load(f)
    cached = True
elif args.pca:
    sklearn_transformer = PCA(n_components=args.n_components,
                              svd_solver="full",
                              random_state=1234)
elif args.fastica:
    sklearn_transformer = FastICA(n_components=args.n_components,
                                  random_state=1234)
elif args.incrementalpca:
    sklearn_transformer = IncrementalPCA(n_components=args.n_components)
elif args.kernelpca:
    sklearn_transformer = KernelPCA(n_components=args.n_components,
                                    random_state=1234,
                                    n_jobs=-1)
elif args.nmf:
    sklearn_transformer = NMF(n_components=args.n_components,
                              random_state=1234)
elif args.truncatedsvd:
    sklearn_transformer = TruncatedSVD(n_components=args.n_components,
                                       random_state=1234)

if not cached:
    print(f"Fitting {sklearn_transformer.__class__.__name__}...", end="")
Example #43
0
def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
    """
    Perform ICA on `data` and returns mixing matrix

    Parameters
    ----------
    data : (S x T) :obj:`numpy.ndarray`
        Dimensionally reduced optimally combined functional data, where `S` is
        samples and `T` is time
    n_components : :obj:`int`
        Number of components retained from PCA decomposition
    fixed_seed : :obj:`int`
        Seed for ensuring reproducibility of ICA results
    maxit : :obj:`int`, optional
        Maximum number of iterations for ICA. Default is 500.
    maxrestart : :obj:`int`, optional
        Maximum number of attempted decompositions to perform with different
        random seeds. ICA will stop running if there is convergence prior to
        reaching this limit. Default is 10.

    Returns
    -------
    mmix : (T x C) :obj:`numpy.ndarray`
        Z-scored mixing matrix for converting input data to component space,
        where `C` is components and `T` is the same as in `data`

    Notes
    -----
    Uses `sklearn` implementation of FastICA for decomposition
    """

    warnings.filterwarnings(action='ignore',
                            module='scipy',
                            message='^internal gelsd')

    if fixed_seed == -1:
        fixed_seed = np.random.randint(low=1, high=1000)

    for i_attempt in range(maxrestart):
        ica = FastICA(n_components=n_components,
                      algorithm='parallel',
                      fun='logcosh',
                      max_iter=maxit,
                      random_state=fixed_seed)

        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered in order to capture
            # convergence failures.
            warnings.simplefilter('always')

            ica.fit(data)

            w = list(filter(lambda i: issubclass(i.category, UserWarning), w))
            if len(w):
                LGR.warning('ICA attempt {0} failed to converge after {1} '
                            'iterations'.format(i_attempt + 1, ica.n_iter_))
                if i_attempt < maxrestart - 1:
                    fixed_seed += 1
                    LGR.warning(
                        'Random seed updated to {0}'.format(fixed_seed))
            else:
                LGR.info('ICA attempt {0} converged in {1} '
                         'iterations'.format(i_attempt + 1, ica.n_iter_))
                break

    mmix = ica.mixing_
    mmix = stats.zscore(mmix, axis=0)
    return mmix
# Start timer
start_time = time.time()

# Load the data
from income_data import X, y, X_train, X_test, y_train, y_test

# Scale the data
scaler = StandardScaler()
scaler.fit(X)
X_train_std = scaler.transform(X)
X_test_std = scaler.transform(X)
X_toCluster = X_train_std
y_inputs = y

# Reduce Dimensionality (ICA)
projection = ProjectionAlgorithm(n_components=29)
X_toCluster = projection.fit_transform(X_toCluster)

######
# Run em clustering with 2 clusters and plot
######
cluster = GaussianMixture(random_state=0, n_components=2).fit(X_toCluster)
cluster_labels = cluster.predict(X_toCluster)

X_transformed = np.dot(X_toCluster, np.transpose(cluster.means_))

# print diagnostics
print('X_toCluster.shape \n', X_toCluster.shape)
print('X_transformed.shape \n', X_transformed.shape)
print('Labels \n', cluster_labels)
print('Weights \n', cluster.weights_)
Example #45
0
    pca.fit(r)
    X = pca.transform(r)
    np.savetxt(
        r'C:\Users\justjo\PycharmProjects\SaS_clustering\tensorboard\SaS_2020-03-04-22-10-42\PCA_embeds.csv',
        X,
        delimiter=',')

from sklearn.neighbors import NearestNeighbors
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

from sklearn.decomposition import FastICA
transformer = FastICA(n_components=32, random_state=0)
X_transformed = transformer.fit_transform(embed_simclr)

embeds = np.array(np.load(r'J:\SaS\embeds.npy', allow_pickle=True))
# embeds_std = np.array([x.reshape(-1,32).std(axis=0) for x in embeds])
embeds = np.array([x.reshape(-1, 32).mean(axis=0) for x in embeds])
embeds = embeds[:, embeds.std(axis=0) > 0]
# embeds_std = embeds_std[:, embeds_std.std(axis=0)>0]
imgs_raw = np.load(r'J:\SaS\imgs_raw_coded_png_bytes.npy')
# embeds_stacked = np.hstack((embeds, embeds_std))
nbrs = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(embeds)
# nbrs_std = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(embeds_std)
# nbrs_stacked = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(embeds_stacked)
_, indices = nbrs.kneighbors(embeds[8856].reshape(1, -1), 1000)
# _, indices_std = nbrs_std.kneighbors(embeds_std[194485].reshape(1,-1), 1000)
# _, indices_stacked = nbrs_std.kneighbors(embeds_std[200000].reshape(1,-1), 1000)
def ica_model(train_x,components):
    model = FastICA(n_components=components).fit(train_x)
    return model
Example #47
0
    fica_input         = pca_out.copy()              #(Nv,Nt)
    #####fica_input   /= fica_input.std(axis=0)   #(Nv,Nt)   #NOT SURE IF NEEDED OR NOT

    if options.reuse and os.path.exists(ica_mix_path) and os.path.exists(ica_mix_zsc_path) and os.path.exists(ica_mix_fft_path) and os.path.exists(ica_mix_freqs_path) and os.path.exists(ica_maps_path) and os.path.exists(ica_octs_path):
       print(" +              Loading pre-existing ICA Mixing matrix [%s]." % (ica_mix_path))
       print(" +              Loading pre-existing Normalized ICA Mixing matrix [%s]." % (ica_mix_zsc_path))
       print(" +              Loading pre-existing ICA Maps [%s]." % (ica_maps_path))
       fica_mmix         = np.loadtxt(ica_mix_path).T
       fica_mmix_zsc     = np.loadtxt(ica_mix_zsc_path).T
       octs_afterICA,_,_ = meu.niiLoad(ica_octs_path)
       octs_afterICA     = octs_afterICA[mask,:]
       fica_out,_,_      = meu.niiLoad(ica_maps_path)
       fica_out          = fica_out[mask,:]
    else:
       print(" +              Perform ICA....")
       fica       = FastICA(n_components=Nc, max_iter=500)
       fica_out = fica.fit_transform(fica_input).T #Original did not have the .T
       fica_out -= fica_out.mean(axis=0)
       fica_out /= fica_out.std(axis=0)
       fica_out  = fica_out.T

       fica_mmix  = fica.mixing_.T
       # Correct the sign of components
       # ------------------------------
       print(" +              Correct the sign of the ICA components....")
       fica_signs          = skew(np.reshape(fica_out,(Nv,Nc)),axis=0) #(Nc,)
       fica_signs         /= np.abs(fica_signs)                        #(Nc,)
       fica_out            = (fica_out.T*fica_signs[:,np.newaxis]).T   #(Nv,Nc)
       fica_mmix           = (fica_mmix*fica_signs[:,np.newaxis])      #(Nc,Nt)
       fica_mmix_zsc       = zscore(fica_mmix,axis=-1)                 #(Nc,Nt)
       # Save ICA Mixing matrix, its normalized version and the ICA maps
from scipy import signal
from scipy.fftpack import fft, fftfreq, fftshift
from sklearn.decomposition import PCA, FastICA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# Change these variables based on the location of your cascade classifier
PATH_TO_HAAR_CASCADES = "image_processing/"
face_cascade = cv2.CascadeClassifier(
    PATH_TO_HAAR_CASCADES +
    'haarcascade_frontalface_default.xml')  # Full pathway must be used
firstFrame = None
time = []
R = []
G = []
B = []
pca = FastICA(n_components=3)
cap = cv2.VideoCapture(0)
if cap.isOpened() == False:
    print("Failed to open webcam")
frame_num = 0
plt.ion()
while cap.isOpened():
    ret, frame = cap.read()
    if ret == True:
        frame_num += 1
        if firstFrame is None:
            start = datetime.datetime.now()
            time.append(0)
            # Take first frame and find face in it
            firstFrame = frame
            cv2.imshow("frame", firstFrame)
Example #49
0
fs2, psa2 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft2Data\\AF3.csv")
fs3, psa3 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft3Data\\AF3.csv")
fs4, psa4 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft4Data\\AF3.csv")
fs6, psa6 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft6Data\\AF3.csv")
fs7, psa7 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft7Data\\AF3.csv") """

X1 = np.c_[np.array(fs1),
           np.array(fs2),
           np.array(fs3),
           np.array(fs4),
           np.array(fs6),
           np.array(fs7)]
#X2 = np.c_[np.array(psa1), np.array(psa2), np.array(psa3), np.array(psa4), np.array(psa6), np.array(psa7)]

# ICA
ica = FastICA(n_components=3)
S_ = np.array(ica.fit_transform(X1))  # Reconstruct signals

fs_1, t1, psa_1 = signal.spectrogram(S_[:, 0], 128, nfft=nfft)
fs_2, t2, psa_2 = signal.spectrogram(S_[:, 1], 128, nfft=nfft)
fs_3, t3, psa_3 = signal.spectrogram(S_[:, 2], 128, nfft=nfft)

X2 = np.c_[np.array(psa_1), np.array(psa_2)]

# PCA
pca = PCA(n_components=3)
H = pca.fit_transform(X1)
""" fs_1, psa_1 = signal.periodogram(H[:,0], 128, nfft=nfft)
fs_2, psa_2 = signal.periodogram(H[:,1], 128, nfft=nfft)
fs_3, psa_3 = signal.periodogram(H[:,2], 128, nfft=nfft)
def main(tree_k=0,bank_k=0,tree_cluster=0,bank_cluster=0, \
      tree_pca=0, bank_pca=0, tree_ica=0, bank_ica=0,tree_rp=0,bank_rp=0,\
      tree_feature=0, bank_feature=0, tree_NN=0, NN_KM=0, NN_EM=0, NN_without_org=0
    ):

    bank_NN = 0

    # PREPROCESS WILT DATA
    data = pd.read_csv('wilt_full.csv')
    data['class'].replace(['n'], 0, inplace=True)
    data['class'].replace(['w'], 1, inplace=True)
    x_data = data.loc[:, data.columns != 'class']
    y_data = data.loc[:, 'class']
    scaler = StandardScaler()
    x_data = scaler.fit_transform(x_data)
    columns = list(data.columns.values)
    random_state = 100
    # Hold out test set for final performance measure
    x_train, x_test, y_train, y_test = train_test_split(
        x_data,
        y_data,
        test_size=0.3,
        random_state=random_state,
        shuffle=True,
        stratify=y_data)

    if tree_k:
        plot_silhouette_test(x_train,
                             'Silhouette score for Diseased Tree dataset')
        plot_sse_test(
            x_data, 'Sum Squared Errors (K-means) for Diseased Tree dataset')
        plot_bic_test(
            x_data,
            'BIC score (Expectation Maximization) for Diseased Tree dataset')

    if tree_cluster:
        # PLOT CLUSTER FOR K-MEANS
        generate_clusters('KM', x_data, y_data, columns,
                          'K-means cluster scatter plots for each attribute',
                          [1, 5], 'class')
        # PLOT CLUSTER FOR EM
        generate_clusters('EM', x_data, y_data, columns,
                          'EM cluster scatter plots for each attribute',
                          [1, 5], 'class')
        # PLOT CLUSTER FOR GROUND TRUTH
        plot_clusters(data, [1, 5], 'class',
                      'Ground truth cluster scatter plots for each attribute')

    if tree_pca:
        print(x_data.shape)
        transformer = PCA(n_components=2)
        x_pca = transformer.fit_transform(x_data)
        eigen_vals = transformer.explained_variance_
        print(x_pca.shape)
        proj = transformer.inverse_transform(x_pca)
        loss = ((x_data - proj)**2).mean()
        print('PCA loss is: ', loss)

        # Sebastian Raschka, Vahid Mirjalili - Python Machine Learning_ Machine Learning and Deep Learning with Python, scikit-learn, and TensorFlow 2
        total_eigen = sum(eigen_vals)
        var_exp = [(i / total_eigen) for i in sorted(eigen_vals, reverse=True)]
        cum_var_exp = np.cumsum(var_exp)
        plt.bar(range(1, 3),
                var_exp,
                align='center',
                label='individual explained variance')
        plt.step(range(1, 3),
                 cum_var_exp,
                 where='mid',
                 label='Cummulative explained variance',
                 color='green')
        plt.xlabel('Principal component index')
        plt.ylabel('Explained variance ratio')
        plt.tight_layout()
        plt.show()
        columns = ['class', 'principle component 1', 'principle component 2']
        generate_clusters(
            'KM',
            x_pca,
            y_data,
            columns,
            'Cluster dist. plots for each PCA component (K-means)', [1, 2],
            'class',
            type='num')
        generate_clusters('EM',
                          x_pca,
                          y_data,
                          columns,
                          'Cluster dist. plots for each PCA component (EM)',
                          [1, 2],
                          'class',
                          type='num')

    if tree_ica:
        kurts = []
        comps = [i for i in range(1, 6)]
        for i in comps:
            transformer = FastICA(n_components=i)
            x_ICA = transformer.fit_transform(x_data)
            kurt = kurtosis(x_ICA).mean()
            print(kurt)
            kurts.append(kurt)

        plt.plot(comps, kurts)
        plt.xlabel('Components')
        plt.ylabel('Kurtosis')
        plt.title('Kurtosis plot for ICA (Tree)')
        plt.xticks(comps)
        plt.show()

        transformer = FastICA(n_components=2)
        x_ICA = transformer.fit_transform(x_data)
        #    mu = np.mean(x_data, axis=0)
        #  print(x_RP.shape)
        #  print(transformer.mixing_)
        #   proj2 = np.linalg.lstsq(x_RP.T, transformer.components_)[0]
        #  proj2 = x_RP.dot(transformer.components_) + mu

        proj = transformer.inverse_transform(x_ICA)
        loss = ((x_data - proj)**2).mean()
        print('ICA loss is: ', loss)
        columns = [
            'class', 'Independent component 1', 'Idependent component 2'
        ]
        generate_clusters(
            'KM',
            x_ICA,
            y_data,
            columns,
            'Cluster dist. plots for eachICA component (K-means)', [1, 2],
            'class',
            type='num')
        generate_clusters('EM',
                          x_ICA,
                          y_data,
                          columns,
                          'Cluster dist. plots for each ICA component (EM)',
                          [1, 2],
                          'class',
                          type='num')

    if tree_rp:
        losses, kurts = [], []
        comps = [i for i in range(2, 6)]
        for i in comps:
            transformer = random_projection.GaussianRandomProjection(
                n_components=i)
            mu = np.mean(x_data, axis=0)
            x_RP = transformer.fit_transform(x_data)
            t_matrix = transformer.components_

            proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu
            loss = ((x_data - proj)**2).mean()
            kurt = kurtosis(x_RP).mean()
            kurts.append(kurt)
            losses.append(loss)
        fig = plt.figure(1)
        ax = fig.add_subplot(121)
        ax.plot(comps, kurts)
        ax.set(xlabel='Components',
               ylabel='Kurtosis',
               title='Kurtosis plot for RP (Tree)',
               xticks=comps)

        ax = fig.add_subplot(122)
        ax.plot(comps, losses)
        ax.set(xlabel='Components',
               ylabel='Loss',
               title='Loss plot for RP (Tree)',
               xticks=comps)

        losses, kurts = [], []

        comps = range(1, 11)
        for i in comps:
            transformer = random_projection.GaussianRandomProjection(
                n_components=2)
            mu = np.mean(x_data, axis=0)
            x_RP = transformer.fit_transform(x_data)
            t_matrix = transformer.components_
            proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu
            loss = ((x_data - proj)**2).mean()
            kurt = kurtosis(x_RP).mean()
            kurts.append(kurt)
            losses.append(loss)
        fig = plt.figure(2)
        ax = fig.add_subplot(121)
        ax.plot(comps, kurts)
        ax.set(xlabel='Run index',
               ylabel='Kurtosis',
               title='Kurtosis plot for RP (Tree)',
               xticks=comps)
        ax = fig.add_subplot(122)
        ax.plot(comps, losses)
        ax.set(xlabel='Run index',
               ylabel='Loss',
               title='Loss plot for RP (Tree)',
               xticks=comps)

        plt.show()

        transformer = random_projection.GaussianRandomProjection(
            n_components=2)
        mu = np.mean(x_data, axis=0)
        x_RP = transformer.fit_transform(x_data)
        t_matrix = transformer.components_
        proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu
        loss = ((x_data - proj)**2).mean()
        print('RP loss is: ', loss)
        columns = ['class', 'Random component 1', 'Random component 2']
        generate_clusters(
            'KM',
            x_RP,
            y_data,
            columns,
            'Cluster dist. plots for each Random Projection component (K-means)',
            [1, 2],
            'class',
            type='num')
        generate_clusters(
            'EM',
            x_RP,
            y_data,
            columns,
            'Cluster dist. plots for each Random Projection component (EM)',
            [1, 2],
            'class',
            type='num')

    if tree_feature:
        clf = ExtraTreesClassifier(n_estimators=50)
        clf = clf.fit(x_data, y_data)
        print(clf.feature_importances_)
        model = SelectFromModel(clf,
                                prefit=True,
                                threshold=0.02,
                                max_features=2)  # default is mean threshold
        x_FS = model.transform(x_data)
        feature_counts = x_FS.shape[1]
        print(x_FS.shape)
        columns = [
            'class', 'Feature sel. component 1', 'Feature sel. component 2'
        ]
        generate_clusters(
            'KM',
            x_FS,
            y_data,
            columns,
            'Cluster dist. plots for each Feature Selection component (K-means)',
            [1, 2],
            'class',
            type='num')
        generate_clusters(
            'EM',
            x_FS,
            y_data,
            columns,
            'Cluster dist. plots for each Feature Selection component (EM)',
            [1, 2],
            'class',
            type='num')

    if tree_NN:
        num_comps = 2
        num_clusters = 3
        f1_scores, accuracys, train_times = [], [], []
        clfs = []
        data_sets = [x_data]
        data_sets_km = [x_data]
        data_sets_em = [x_data]
        names = ['Original', 'PCA', 'ICA', 'Rand. Proj.', 'Feature Sel ']

        transformer_PCA = PCA(n_components=num_comps)
        x_PCA = transformer_PCA.fit_transform(x_data)
        data_sets.append(x_PCA)

        clusterer = KMeans(n_clusters=num_clusters,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_PCA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_PCA, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=num_clusters)
        y_prime = clusterer.fit_predict(x_PCA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_PCA, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_em.append(x_new)

        transformer_ICA = FastICA(n_components=num_comps)
        x_ICA = transformer_ICA.fit_transform(x_data)
        data_sets.append(x_ICA)

        clusterer = KMeans(n_clusters=num_clusters,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_ICA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_ICA, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=num_clusters)
        y_prime = clusterer.fit_predict(x_ICA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_ICA, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_em.append(x_new)

        transformer_RP = random_projection.GaussianRandomProjection(
            n_components=num_comps)
        x_RP = transformer_RP.fit_transform(x_data)
        data_sets.append(x_RP)

        clusterer = KMeans(n_clusters=num_clusters,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_RP)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_RP, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=num_clusters)
        y_prime = clusterer.fit_predict(x_RP)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_RP, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_em.append(x_new)

        clf_FS = ExtraTreesClassifier(n_estimators=50)
        clf_FS = clf_FS.fit(x_data, y_data)
        model = SelectFromModel(clf_FS,
                                prefit=True,
                                threshold=0.02,
                                max_features=2)  # default is mean threshold
        x_FS = model.transform(x_data)
        data_sets.append(x_FS)

        clusterer = KMeans(n_clusters=num_clusters,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_FS)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_FS, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=num_clusters)
        y_prime = clusterer.fit_predict(x_FS)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_FS, y_prime), axis=1)
        if NN_without_org: x_new = y_prime
        data_sets_em.append(x_new)

        # Experiment with NN on projected data
        if NN_KM:
            print('K-means cluster as a feature ...')
            data_sets = data_sets_km
            print(len(data_sets))
            suffix = ' (KM)'
        elif NN_EM:
            data_sets = data_sets_em
            print('EM cluster as a feature ...')
            data_sets = data_sets_km
            print(len(data_sets))
            suffix = ' (EM)'
        else:
            data_sets = data_sets
            suffix = ''

        for x_data in data_sets:
            print(x_data)
            clf = mlrose.NeuralNetwork(
                hidden_nodes = [6,6], activation = 'relu', \
                algorithm = 'gradient_descent', max_iters = 1000, \
                bias = True, is_classifier = True, learning_rate = 0.0001, \
                early_stopping = True, clip_max = 5, max_attempts = 100, \
                random_state = 30)
            curves, train_score, test_score, train_acc, test_acc, train_time, test_time = \
                return_stratified_kcv_results(clf, x_data, y_data)
            f1_scores.append(test_score)
            accuracys.append(test_acc)
            print(accuracys)
            print(f1_scores)
            train_times.append(train_time)

        df_plot = pd.DataFrame({
            'names': names,
            'CV_F1_Score': f1_scores,
            'CV_accuracy': accuracys
        })
        #  df_plot = pd.wide_to_long(df_plot, i=['CV_F1_Score', 'CV_accuracy'], j='Measures')
        df_plot = pd.melt(df_plot, id_vars=['names'], value_vars=['CV_F1_Score','CV_accuracy'],\
                var_name='Measures', value_name='Score')
        fig = plt.figure(1)
        ax = fig.add_subplot(121)
        sb.barplot(x="names", y="Score", hue="Measures", data=df_plot, axes=ax)
        ax.set(xlabel='dataset',
               ylabel='score',
               title='NN on org. + proj. data' + suffix)
        plt.xticks(rotation=30)
        ax = fig.add_subplot(122)
        ax.bar(names, train_times, align='center')
        ax.set(xlabel='dataset',
               ylabel='Train time (s)',
               title='Train time of NN on org. + proj. data' + suffix)
        fig.tight_layout()
        plt.xticks(rotation=30)
        plt.show()

    # PREPROCESS BANK DATA
    data = pd.read_csv('bank_full.csv', sep=';')
    data.drop(['day', 'month'], axis=1, inplace=True)
    data['y'].replace(['no'], 0, inplace=True)
    data['y'].replace(['yes'], 1, inplace=True)
    # convert data to numeric where possible
    data = data.apply(pd.to_numeric, errors='ignore', downcast='float')
    #  print(data.hist)
    x_data = data.loc[:, data.columns != "y"]
    x_data_org = x_data
    y_data = data.loc[:, "y"]
    numerical_features = x_data.dtypes == 'float32'
    categorical_features = ~numerical_features
    columns = list(data.columns.values)
    random_state = 100
    preprocess = make_column_transformer(
        (OneHotEncoder(), categorical_features),
        (Normalizer(), numerical_features),
        remainder="passthrough")
    x_data = preprocess.fit_transform(x_data)
    # Hold out test set for final performance measure
    x_train, x_test, y_train, y_test = train_test_split(
        x_data,
        y_data,
        test_size=0.3,
        random_state=random_state,
        shuffle=True,
        stratify=y_data)

    if bank_k:
        plot_silhouette_test(x_data,
                             'Silhouette score for Bank Marketing dataset')
        plot_sse_test(
            x_data, 'Sum Squared Errors (K-means) for Bank Marketing dataset')
        plot_bic_test(
            x_data,
            'BIC score (Expectation Maximization) for Bank Marketing dataset')

    if bank_cluster:
        # PLOT CLUSTER FOR K-MEANS
        generate_clusters('KM',
                          x_data,
                          y_data,
                          columns,
                          'K-means cluster scatter plots for each attribute',
                          [2, 7],
                          'y',
                          x_data_org=x_data_org)
        # PLOT CLUSTER FOR EM
        generate_clusters('EM',
                          x_data,
                          y_data,
                          columns,
                          'EM cluster scatter plots for each attribute',
                          [2, 7],
                          'y',
                          x_data_org=x_data_org)
        # PLOT CLUSTER FOR GROUND TRUTH
        plot_clusters(data, [2, 7], 'y',
                      'Ground truth cluster scatter plots for each attribute')

    if bank_pca:
        print(x_data.shape)
        transformer = PCA(n_components=8)
        x_pca = transformer.fit_transform(x_data)
        eigen_vals = transformer.explained_variance_

        proj = transformer.inverse_transform(x_pca)
        loss = ((x_data - proj)**2).mean()
        print('PCA loss is: ', loss)

        total_eigen = sum(eigen_vals)
        var_exp = [(i / total_eigen) for i in sorted(eigen_vals, reverse=True)]
        cum_var_exp = np.cumsum(var_exp)
        plt.bar(range(1, 9),
                var_exp,
                align='center',
                label='individual explained variance')
        plt.step(range(1, 9),
                 cum_var_exp,
                 where='mid',
                 label='Cummulative explained variance',
                 color='green')
        plt.xlabel('Principal component index')
        plt.ylabel('Explained variance ratio')
        plt.tight_layout()
        plt.show()

        # Sebastian Raschka, Vahid Mirjalili - Python Machine Learning_ Machine Learning and Deep Learning with Python, scikit-learn, and TensorFlow 2
        columns = ['class'
                   ] + ['principle component ' + str(i) for i in range(1, 9)]
        generate_clusters(
            'KM',
            x_pca,
            y_data,
            columns,
            'Bank Cluster dist. plots for each PCA component (K-means)',
            [2, 4],
            'class',
            type='num')
        generate_clusters(
            'EM',
            x_pca,
            y_data,
            columns,
            'Bank Cluster dist. plots for each PCA component (EM)', [2, 4],
            'class',
            type='num')

    if bank_ica:
        kurts = []
        comps = [i for i in range(2, 12)]
        for i in comps:
            transformer = FastICA(n_components=i)
            x_ICA = transformer.fit_transform(x_data)
            kurt = kurtosis(x_ICA).mean()
            print(kurt)
            kurts.append(kurt)

        plt.plot(comps, kurts)
        plt.xlabel('Components')
        plt.ylabel('Kurtosis')
        plt.title('Kurtosis plot for ICA (bank)')
        plt.xticks(comps)
        plt.show()

        transformer = FastICA(n_components=8)
        x_ICA = transformer.fit_transform(x_data)

        proj = transformer.inverse_transform(x_ICA)
        loss = ((x_data - proj)**2).mean()
        print('ICA loss is: ', loss)
        columns = ['class'
                   ] + ['principle component ' + str(i) for i in range(1, 9)]
        generate_clusters(
            'KM',
            x_ICA,
            y_data,
            columns,
            'Bank Cluster dist. plots for each ICA component (K-means-bank)',
            [2, 4],
            'class',
            type='num')
        generate_clusters(
            'EM',
            x_ICA,
            y_data,
            columns,
            'Bank Cluster dist. plots for each ICA component (EM-bank)',
            [2, 4],
            'class',
            type='num')

    if bank_rp:
        losses, kurts = [], []
        comps = [i for i in range(1, 12)]
        for i in comps:
            transformer = random_projection.GaussianRandomProjection(
                n_components=i)
            mu = np.mean(x_data, axis=0)
            x_RP = transformer.fit_transform(x_data)
            t_matrix = transformer.components_

            proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu
            loss = ((x_data - proj)**2).mean()
            kurt = kurtosis(x_RP).mean()
            kurts.append(kurt)
            losses.append(loss)
        fig = plt.figure(1)
        ax = fig.add_subplot(121)
        ax.plot(comps, kurts)
        ax.set(xlabel='Components',
               ylabel='Kurtosis',
               title='Kurtosis plot for RP (Bank)',
               xticks=comps)

        ax = fig.add_subplot(122)
        ax.plot(comps, losses)
        ax.set(xlabel='Components',
               ylabel='Loss',
               title='Kurtosis plot for RP (Bank)',
               xticks=comps)

        losses, kurts = [], []

        comps = range(1, 12)
        for i in comps:
            transformer = random_projection.GaussianRandomProjection(
                n_components=8)
            mu = np.mean(x_data, axis=0)
            x_RP = transformer.fit_transform(x_data)
            t_matrix = transformer.components_
            proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu
            loss = ((x_data - proj)**2).mean()
            kurt = kurtosis(x_RP).mean()
            kurts.append(kurt)
            losses.append(loss)
        fig = plt.figure(2)
        ax = fig.add_subplot(121)
        ax.plot(comps, kurts)
        ax.set(xlabel='Run index',
               ylabel='Kurtosis',
               title='Kurtosis plot for RP (Bank)',
               xticks=comps)
        ax = fig.add_subplot(122)
        ax.plot(comps, losses)
        ax.set(xlabel='Run index',
               ylabel='Loss',
               title='Loss plot for RP (Bank)',
               xticks=comps)

        plt.show()

        transformer = random_projection.GaussianRandomProjection(
            n_components=8)
        mu = np.mean(x_data, axis=0)
        x_RP = transformer.fit_transform(x_data)
        t_matrix = transformer.components_
        proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu
        loss = ((x_data - proj)**2).mean()
        print('RP loss is: ', loss)
        columns = ['class'
                   ] + ['Random component ' + str(i) for i in range(1, 9)]
        generate_clusters(
            'KM',
            x_RP,
            y_data,
            columns,
            'Dist. plots for each Random Projection component (K-means)',
            [2, 4],
            'class',
            type='num')
        generate_clusters(
            'EM',
            x_RP,
            y_data,
            columns,
            'Dist.  plots for each Random Projection component (EM)', [2, 4],
            'class',
            type='num')

    if bank_feature:
        clf = ExtraTreesClassifier(n_estimators=50)
        clf = clf.fit(x_data, y_data)
        print(clf.feature_importances_)
        model = SelectFromModel(clf,
                                prefit=True,
                                threshold=0.00525,
                                max_features=8)  # default is mean threshold
        x_FS = model.transform(x_data)
        print(x_FS.shape)
        columns = ['class'] + [
            'Feature selection component ' + str(i) for i in range(1, 9)
        ]
        generate_clusters(
            'KM',
            x_FS,
            y_data,
            columns,
            'Cluster dist. plots for each Feature Selection component (K-means)',
            [2, 4],
            'class',
            type='num')
        generate_clusters(
            'EM',
            x_FS,
            y_data,
            columns,
            'Cluster dist. plots for each Feature Selection component (EM)',
            [2, 4],
            'class',
            type='num')

    if bank_NN:
        f1_scores, accuracys, train_times = [], [], []
        clfs = []
        data_sets = [x_data]
        data_sets_km = [x_data]
        data_sets_em = [x_data]
        names = ['Original', 'PCA', 'ICA', 'Rand. Proj.', 'Feature Sel ']

        transformer = PCA(n_components=8)
        x_PCA = transformer.fit_transform(x_data)
        data_sets.append(x_PCA)

        clusterer = KMeans(n_clusters=2,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_PCA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_PCA, y_prime), axis=1)
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=2)
        y_prime = clusterer.fit_predict(x_PCA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_PCA, y_prime), axis=1)
        data_sets_em.append(x_new)

        transformer = FastICA(n_components=8)
        x_ICA = transformer.fit_transform(x_data)
        data_sets.append(x_ICA)

        clusterer = KMeans(n_clusters=2,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_ICA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_ICA, y_prime), axis=1)
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=2)
        y_prime = clusterer.fit_predict(x_ICA)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_ICA, y_prime), axis=1)
        data_sets_em.append(x_new)

        transformer = random_projection.GaussianRandomProjection(
            n_components=8)
        x_RP = transformer.fit_transform(x_data)
        data_sets.append(x_RP)

        clusterer = KMeans(n_clusters=2,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_RP)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_RP, y_prime), axis=1)
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=2)
        y_prime = clusterer.fit_predict(x_RP)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_RP, y_prime), axis=1)
        data_sets_em.append(x_new)

        clf_FS = ExtraTreesClassifier(n_estimators=50)
        clf_FS = clf_FS.fit(x_data, y_data)
        model = SelectFromModel(clf_FS,
                                prefit=True,
                                threshold=0.0002,
                                max_features=8)  # default is mean threshold
        x_FS = model.transform(x_data)
        data_sets.append(x_FS)

        clusterer = KMeans(n_clusters=2,
                           n_init=30,
                           max_iter=300,
                           random_state=100)
        y_prime = clusterer.fit_predict(x_FS)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_FS, y_prime), axis=1)
        data_sets_km.append(x_new)

        clusterer = GaussianMixture(n_components=2)
        y_prime = clusterer.fit_predict(x_FS)
        y_prime = np.array([y_prime]).T
        x_new = np.concatenate((x_FS, y_prime), axis=1)
        data_sets_em.append(x_new)

        # Experiment with NN on projected data
        if NN_KM:
            print('K-means cluster as a feature ...')
            data_sets = data_sets_km
            print(len(data_sets))
            suffix = '-with K-means cluster'
        elif NN_EM:
            data_sets = data_sets_em
            print('K-means cluster as a feature ...')
            data_sets = data_sets_km
            print(len(data_sets))
            suffix = '-with EM cluster'
        else:
            data_sets = data_sets
            suffix = ''

        for x_data in data_sets:
            print(x_data.shape)
            clf = mlrose.NeuralNetwork(
                hidden_nodes = [6,6], activation = 'relu', \
                algorithm = 'gradient_descent', max_iters = 1000, \
                bias = True, is_classifier = True, learning_rate = 0.0001, \
                early_stopping = True, clip_max = 5, max_attempts = 100, \
                random_state = 30)
            curves, train_score, test_score, train_acc, test_acc, train_time, test_time = \
                return_stratified_kcv_results(clf, x_data, y_data)
            f1_scores.append(test_score)
            accuracys.append(test_acc)
            print(accuracys)
            print(f1_scores)
            train_times.append(train_time)

        df_plot = pd.DataFrame({
            'names': names,
            'CV_F1_Score': f1_scores,
            'CV_accuracy': accuracys
        })
        #  df_plot = pd.wide_to_long(df_plot, i=['CV_F1_Score', 'CV_accuracy'], j='Measures')
        df_plot = pd.melt(df_plot, id_vars=['names'], value_vars=['CV_F1_Score','CV_accuracy'],\
                var_name='Measures', value_name='Score')
        fig = plt.figure(1)
        ax = fig.add_subplot(121)
        sb.barplot(x="names", y="Score", hue="Measures", data=df_plot, axes=ax)
        ax.set(xlabel='dataset',
               ylabel='score',
               title='NN on original + proj. data' + suffix)
        plt.xticks(rotation=30)
        ax = fig.add_subplot(122)
        ax.bar(names, train_times, align='center')
        ax.set(xlabel='dataset',
               ylabel='Train time (s)',
               title='NN on original + proj. data' + suffix)
        fig.tight_layout()
        plt.xticks(rotation=30)
        plt.show()
Example #51
0
def apply_ICA(proj_data, proj_weights=None):
    ica = FastICA(n_components=2, random_state=RANDOM_SEED)
    norm_data = normalize_columns(proj_data)
    result = ica.fit_transform(norm_data.T)
    # Copy needed because ICA whitens the input matrix
    return result
Example #52
0
#              name="PCA-based",
#              data=data)
#print(79 * '_')

#(2) apply dimension reduction algorithms
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA

PCA_data = PCA(n_components=5, whiten=False)
temp = PCA_data.fit(data)
#temp1= temp.components_
PCA_data_trans = PCA_data.transform(data)
PCA_data_trans_test = PCA_data.transform(data_test)
#PCA_comp = PCA_data.components_

ICA_data = FastICA(n_components=5)
ICA_data.fit(data)
ICA_data_trans = ICA_data.transform(data)
ICA_data_trans_test = ICA_data.transform(data_test)

from sklearn.random_projection import GaussianRandomProjection

transformer = GaussianRandomProjection(n_components=5, eps=0.1)
RP_data_trans = transformer.fit_transform(data)
RP_data_trans_test = transformer.fit_transform(data_test)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

transformer = LinearDiscriminantAnalysis(solver="svd", n_components=5)
LDA_data_trans = transformer.fit_transform(data)
# LDA_data_temp = LDA_data.fit(X=data,y=labels)
Example #53
0
def doICA(data, w1, w3, tau2, n_comp=10):
    data_r = np.zeros((data.shape[2], data.shape[0] * data.shape[1]))
    for i in range(data.shape[2]):
        data_r[i] = np.nan_to_num(data[:, :, i]).ravel()

    # Standardize
#    data_r = (data_r - np.mean(data_r,axis=0))/np.std(data_r,ddof=1,axis=0)
#    data_r = normalize(data_r,norm='l2',axis=0)

    ica = FastICA(n_components=n_comp, whiten=True)
    ica.fit(data_r)
    comp = np.zeros((data.shape[0], data.shape[1], ica.components_.shape[0]))
    for i in range(ica.components_.shape[0]):
        comp[:, :, i] = ica.components_[i].reshape(data.shape[0],
                                                   data.shape[1])

    # Plot a series of components
    w1grid, w3grid = np.meshgrid(w3, w1)
    fig, axarr = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
    fig.subplots_adjust(hspace=0.3, wspace=0.4)
    for i in range(9):
        ax = axarr.flatten()[i]
        img = comp[:, :, i]
        ax.pcolormesh(w1grid, w3grid, img)
        ax.set_title('Component ' + str(i))
        ax.set_xlim(w1grid.min(), w1grid.max())
        ax.set_ylim(w3grid.min(), w3grid.max())
        plt.setp(ax.get_yticklabels(), visible=True)
        plt.setp(ax.get_xticklabels(), visible=True)
    plt.show()

    #create surface plots
    surf3d(w1, w3, comp[:, :, 0])
    surf3d(w1, w3, comp[:, :, 1])
    surf3d(w1, w3, comp[:, :, 2])

    data_c = ica.transform(data_r)

    # Plot filtered contours
    w1grid, w3grid = np.meshgrid(w3, w1)
    fig, axarr = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
    fig.subplots_adjust(hspace=0.3, wspace=0.4)
    for j in range(9):
        ax = axarr.flatten()[j]
        i = 2 * j
        img = comp[:,:,0]*data_c[i,0] + comp[:,:,1]*data_c[i,1] + \
            comp[:,:,2]*data_c[i,2] + comp[:,:,3]*data_c[i,3] + comp[:,:,4]*data_c[i,4]
        ax.pcolormesh(w1grid, w3grid, img)
        ax.set_title('Time ' + str(tau2[i]))
        ax.set_xlim(w1grid.min(), w1grid.max())
        ax.set_ylim(w3grid.min(), w3grid.max())
        plt.setp(ax.get_yticklabels(), visible=True)
        plt.setp(ax.get_xticklabels(), visible=True)
    plt.show()

    # Plot against time
    plt.figure(figsize=(5, 5))
    plt.scatter(tau2, data_c[:, 0], color='red', label='C0')  # component 0
    plt.scatter(tau2, data_c[:, 1], color='orange', label='C1')
    plt.scatter(tau2, data_c[:, 2], color='green', label='C2')
    plt.scatter(tau2, data_c[:, 3], color='blue', label='C3')
    plt.xlabel('Time (fs)')
    plt.legend(loc='lower right')

    # Plot value of component contribution vs. component for selected times
    plt.figure(figsize=(5, 5))
    comp_num = list(range(10))
    plt.plot(comp_num, (data_c[0]), color='red', label='t=0')  # component 0
    plt.plot(comp_num, (data_c[3]), color='orange', label='t=270')
    plt.plot(comp_num, (data_c[6]), color='green', label='t=540')
    plt.plot(comp_num, (data_c[9]), color='blue', label='t=5000')
    plt.plot(comp_num, (data_c[12]), color='purple', label='t=20000')
    plt.xlim(-0.5, 10.5)
    plt.ylim(-5, 6.6)
    plt.xlabel('Component')
    plt.ylabel('Component contribution')
    plt.legend(loc='upper right')
    plt.show()

    #print(tau2.shape)
    p0 = .1, .1, .1
    popt, pcov = curve_fit(my_exponential,
                           tau2.ravel() * 0.001,
                           data_c[:, 0],
                           p0,
                           maxfev=1000)
    print(popt)
def lca(data):
    lca = FastICA(n_components=150)
    data = lca.fit_transform(data)
    return pd.DataFrame(data)
Example #55
0
def test_fastica_simple(add_noise=False):
    """ Test the FastICA algorithm on very simple data.
    """
    rng = np.random.RandomState(0)
    # scipy.stats uses the global RNG:
    np.random.seed(0)
    n_samples = 1000
    # Generate two sources:
    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
    s2 = stats.t.rvs(1, size=n_samples)
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing angle
    phi = 0.6
    mixing = np.array([[np.cos(phi),  np.sin(phi)],
                       [np.sin(phi), -np.cos(phi)]])
    m = np.dot(mixing, s)

    if add_noise:
        m += 0.1 * rng.randn(2, 1000)

    center_and_norm(m)

    # function as fun arg
    def g_test(x):
        return x ** 3, 3 * x ** 2

    algos = ['parallel', 'deflation']
    nls = ['logcosh', 'exp', 'cube', g_test]
    whitening = [True, False]
    for algo, nl, whiten in itertools.product(algos, nls, whitening):
        if whiten:
            k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo)
            assert_raises(ValueError, fastica, m.T, fun=np.tanh,
                          algorithm=algo)
        else:
            X = PCA(n_components=2, whiten=True).fit_transform(m.T)
            k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False)
            assert_raises(ValueError, fastica, X, fun=np.tanh,
                          algorithm=algo)
        s_ = s_.T
        # Check that the mixing model described in the docstring holds:
        if whiten:
            assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m))

        center_and_norm(s_)
        s1_, s2_ = s_
        # Check to see if the sources have been estimated
        # in the wrong order
        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
            s2_, s1_ = s_
        s1_ *= np.sign(np.dot(s1_, s1))
        s2_ *= np.sign(np.dot(s2_, s2))

        # Check that we have estimated the original sources
        if not add_noise:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2)
        else:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1)

    # Test FastICA class
    ica = FastICA(fun=nl, algorithm=algo, random_state=0)
    ica.fit(m.T)
    ica.get_mixing_matrix()
    assert_true(ica.components_.shape == (2, 2))
    assert_true(ica.sources_.shape == (1000, 2))

    for fn in [np.tanh, "exp(-.5(x^2))"]:
        ica = FastICA(fun=fn, algorithm=algo, random_state=0)
        assert_raises(ValueError, ica.fit, m.T)

    assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T)
Example #56
0
 def perform_pca(self):
     self.dm = PCA(n_components=self.n_components)
     self.dm.fit(self.dataset)
     self.components = self.dm.components_
Example #57
0
def main():
    # まずは生体情報のデータeta_data, beta_dataを取り出す -------------------------------------
    
    # データがあるパスに作業ディレクトリ変更
    os.chdir(DATAPATH)
    # data格納用のデータフレームを準備
    data_df = pd.DataFrame([])
    
    for i_sub in range(len(FILENAME_LIST)):
        # データフレームにデータ格納(このデータはすでに標準化済み)
        mean_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="mean").drop("Statistics", axis=1)
        max_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="max").drop("Statistics", axis=1)
        min_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="min").drop("Statistics", axis=1)
        std_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="std").drop("Statistics", axis=1)
        # [平均,最大,最小,標準偏差]の順に横に並べる
        df = pd.concat([mean_df, max_df.drop("Task", axis=1), min_df.drop("Task", axis=1), std_df.drop("Task", axis=1)], axis=1, sort=False)
        # 各被験者の結果を縦に連結(ついでに標準化する)
        data_df = data_df.append(df)
    
    # タスク番号の削除
    data2_df = data_df.drop(["Task"], axis=1)
    data2_df.columns = COLUMNS48
    
    # dataを列ごとに標準化(標準化の標準化)
    stan_data = scipy.stats.zscore(data2_df, axis=0)
    # データフレーム型に変換
    stan_data_df = pd.DataFrame(stan_data, columns=COLUMNS48)
    
    # 粘性,剛性をそれぞれ取り出す
    eta_data_df = stan_data_df.iloc[:, [0, 6, 12, 18, 24, 30, 36, 42]]
    beta_data_df = stan_data_df.iloc[:, [1, 7, 13, 19, 25, 31, 37, 43]]
    eta_data = eta_data_df.values
    beta_data = beta_data_df.values
    
    # -------------------------------------------------------------------------
    
    # 次に主観評価のデータq_stan_dataを取り出す ------------------------------------
    os.chdir(DATAPATH2) # データがあるパスに作業ディレクトリ変更
    # data格納用のデータフレームを準備
    q_data_df = pd.DataFrame([])
    for i_sub in range(len(FILENAME_LIST2)):
        # データフレームにデータ格納
        q_df = pd.read_excel(FILENAME_LIST2[i_sub])
        # 各被験者の結果を縦に連結(ついでに標準化する)
        q_data_df = q_data_df.append(arrange_data(q_df, i_sub))
    
    # タスク番号, 刺激の種類の削除
    q_data2_df = q_data_df.drop(["No", "Stimulation"], axis=1)
    
    # dataを列ごとに標準化(標準化の標準化)
    q_stan_data = scipy.stats.zscore(q_data2_df, axis=0)
    # -------------------------------------------------------------------------
    
    # 刺激の種類のndarray配列を用意
    odor = q_data_df["Stimulation"].values.tolist()
    odor = np.reshape(odor, (len(odor),1)) # 刺激の種類
    
    # 主観評価の主成分分析する---------------------------------------------------
    pca1 = PCA()
    pca1.fit(q_stan_data)
    
    # 分析結果を元にデータセットを主成分に変換する
    transformed1 = pca1.fit_transform(q_stan_data)
    # データフレーム型に変換
    transformed1 = pd.DataFrame(transformed1, columns=["PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", ])
    
    # -------------------------------------------------------------------------
    
    # 相関係数を計算
    # 計算用のデータフレームを作る
    cor_df = pd.concat([transformed1["PC1"], stan_data_df], axis=1, sort=False)
    # 相関データフレーム
    cor_df = cor_df.corr()["PC1"]
    
    # 相関が0.45以上の指標だけをつかって主成分分析------------------------------------
    # 相関が0.45以上の指標を抜き出す
    new_stan_data_df = stan_data_df.drop(cor_df[cor_df<0.45].index, axis=1)
    new_stan_data = new_stan_data_df.values
    
    # PCAする(返り値はndarray)
    un_score, non_score = mypca(q_stan_data, new_stan_data, odor)
    my2Dplot(un_score, non_score)

    # ICAする
    # データの準備
    ica_data = np.vstack((un_score, non_score))
    # FastICAで独立成分分析
    ica = FastICA()
    ica.fit(ica_data)
    Uica = ica.components_.T
    Aica = ica.transform(ica_data).T
    
    Uica = Uica / np.sqrt((Uica**2).sum(axis=0))
    
    un = Aica[:, 0:len(un_score)]
    non = Aica[:, len(un_score):]
    # グラフ描画
    plt.figure(figsize=(5, 5))
    plt.scatter(un[0], un[1], s=80, c=[0.4, 0.6, 0.9], alpha=0.8, linewidths="1", edgecolors=[0, 0, 0])
    plt.scatter(non[0], non[1], s=80, c=[0.5, 0.5, 0.5], alpha=0.8, linewidths="1", edgecolors=[0, 0, 0])
    plt.title("ICA scatter", fontsize=18)
    
    # グラフを表示する
    plt.tight_layout()  # タイトルの被りを防ぐ
    plt.show()
    
    """
Example #58
0
class ICA(object):
    """
    Wrapper for sklearn package.  Performs fast ICA (Independent Component Analysis)

    ICA has 4 methods:
       - fit(waveforms)
       update class instance with ICA fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto ICA space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self,
                 num_components=10,
                 catalog_name='unknown',
                 whiten=True,
                 fun='logcosh',
                 fun_args=None,
                 max_iter=600,
                 tol=.00001,
                 w_init=None,
                 random_state=None,
                 algorithm='parallel'):

        self._decomposition = 'Fast ICA'
        self._num_components = num_components
        self._catalog_name = catalog_name
        self._whiten = whiten
        self._fun = fun
        self._fun_args = fun_args
        self._max_iter = max_iter
        self._tol = tol
        self._w_init = w_init
        self._random_state = random_state
        self._algorithm = algorithm

        self._ICA = FastICA(n_components=self._num_components,
                            whiten=self._whiten,
                            fun=self._fun,
                            fun_args=self._fun_args,
                            max_iter=self._max_iter,
                            tol=self._tol,
                            w_init=self._w_init,
                            random_state=self._random_state,
                            algorithm=self._algorithm)

    def fit(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._ICA.fit(self._waveforms)

    def fit_transform(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._ICA.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self, A):
        # convert basis back to waveforms using fit
        new_waveforms = self._ICA.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._ICA.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the ICA basis vectors (Z^\dagger)"""
        return self._ICA.get_mixing_matrix()
Example #59
0
def train_NN_ICA(filename,
                 X_train,
                 X_test,
                 y_train,
                 y_test,
                 debug=False,
                 numFolds=10,
                 njobs=-1,
                 scalar=1,
                 make_graphs=False,
                 pNN={},
                 nolegend=False,
                 random_seed=1,
                 num_dim=4):
    np.random.seed(random_seed)
    algo = 'ICA-' + str(num_dim)

    start = time.time()
    ica = FastICA(n_components=num_dim, random_state=random_seed)
    ica.fit(X_train)
    X_train = ica.transform(X_train)
    X_test = ica.transform(X_test)

    param_grid = [{
        'hidden_layer_sizes': [(512, 512, 512, 512)],
        'activation': ['relu'],  # 'identity',
        'solver': ['adam'],
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'batch_size': ['auto'],
        'learning_rate_init': [0.001, 0.01],
        'max_iter': [10000],
        'warm_start': [True],
        'early_stopping': [True],
        'random_state': [1]
    }]

    nn_classifier = MLPClassifier()

    grid_search = GridSearchCV(nn_classifier,
                               param_grid,
                               cv=numFolds,
                               scoring='roc_auc_ovr_weighted',
                               return_train_score=True,
                               n_jobs=njobs,
                               verbose=debug)
    grid_search.fit(X_train, y_train)
    cvres = grid_search.cv_results_

    util.save_gridsearch_to_csv(cvres, algo,
                                filename[:-4] + '-' + str(num_dim), scalar,
                                '-kmeans')

    start = time.time()
    nn_classifier.fit(X_train, y_train)
    print('NN Fit Time: ', time.time() - start)

    start = time.time()
    y_prob = nn_classifier.predict_proba(X_train)
    train_score = roc_auc_score(y_train,
                                y_prob,
                                multi_class="ovr",
                                average="weighted")
    print('NN Train Score Time: ', train_score, time.time() - start)

    start = time.time()
    y_prob = nn_classifier.predict_proba(X_test)
    test_score = roc_auc_score(y_test,
                               y_prob,
                               multi_class="ovr",
                               average="weighted")
    print('NN Test Score Time: ', test_score, time.time() - start)

    test_class = MLPClassifier()
    test_class.set_params(**pNN)

    if make_graphs:
        # computer Model Complexity/Validation curves
        util.plot_learning_curve(nn_classifier,
                                 algo,
                                 filename[:-4],
                                 X_train,
                                 y_train,
                                 ylim=(0.0, 1.05),
                                 cv=10,
                                 n_jobs=njobs,
                                 debug=debug)

        # util.compute_vc(algo, 'alpha',
        #               [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500,
        #                1000, 5000, 10000, 100000, 1000000], X_train, y_train, X_test, y_test, nn_classifier,
        #               filename[:-4], test_class, pNN, log=True, njobs=njobs, debug=debug)

    return time.time() - start, round(train_score, 4), round(test_score, 4)
def ICEBEEM_wrapper(X,
                    Y,
                    ebm_hidden_size,
                    n_layers_ebm,
                    n_layers_flow,
                    lr_flow,
                    lr_ebm,
                    seed,
                    ckpt_file='icebeem.pt',
                    test=False):
    np.random.seed(seed)
    torch.manual_seed(seed)
    data_dim = X.shape[1]

    model_ebm = MLP_general(input_size=data_dim,
                            hidden_size=[ebm_hidden_size] * n_layers_ebm,
                            n_layers=n_layers_ebm,
                            output_size=data_dim,
                            use_bn=True,
                            activation_function=F.leaky_relu)

    prior = TransformedDistribution(
        Uniform(torch.zeros(data_dim), torch.ones(data_dim)),
        SigmoidTransform().inv)
    nfs_flow = NSF_AR
    flows = [
        nfs_flow(dim=data_dim, K=8, B=3, hidden_dim=16)
        for _ in range(n_layers_flow)
    ]
    convs = [Invertible1x1Conv(dim=data_dim) for _ in flows]
    norms = [ActNorm(dim=data_dim) for _ in flows]
    flows = list(itertools.chain(*zip(norms, convs, flows)))
    # construct the model
    model_flow = NormalizingFlowModel(prior, flows)

    pretrain_flow = True
    augment_ebm = True

    # instantiate ebmFCE object
    fce_ = ConditionalFCE(data=X.astype(np.float32),
                          segments=Y.astype(np.float32),
                          energy_MLP=model_ebm,
                          flow_model=model_flow,
                          verbose=False)

    init_ckpt_file = os.path.splitext(ckpt_file)[0] + '_0' + os.path.splitext(
        ckpt_file)[1]
    if not test:
        if pretrain_flow:
            # print('pretraining flow model..')
            fce_.pretrain_flow_model(epochs=1, lr=1e-4)
            # print('pretraining done.')

        # first we pretrain the final layer of EBM model (this is g(y) as it depends on segments)
        fce_.train_ebm_fce(epochs=15,
                           augment=augment_ebm,
                           finalLayerOnly=True,
                           cutoff=.5)

        # then train full EBM via NCE with flow contrastive noise:
        fce_.train_ebm_fce(epochs=50,
                           augment=augment_ebm,
                           cutoff=.5,
                           useVAT=False)

        torch.save(
            {
                'ebm_mlp': fce_.energy_MLP.state_dict(),
                'ebm_finalLayer': fce_.ebm_finalLayer,
                'flow': fce_.flow_model.state_dict()
            }, init_ckpt_file)
    else:
        state = torch.load(init_ckpt_file, map_location=fce_.device)
        fce_.energy_MLP.load_state_dict(state['ebm_mlp'])
        fce_.ebm_finalLayer = state['ebm_finalLayer']
        fce_.flow_model.load_stat_dict(state['flow'])

    # evaluate recovery of latents
    recov = fce_.unmixSamples(X, modelChoice='ebm')
    source_est_ica = FastICA().fit_transform((recov))
    recov_sources = [source_est_ica]

    # iterate between updating noise and tuning the EBM
    eps = .025
    for iter_ in range(3):
        mid_ckpt_file = os.path.splitext(ckpt_file)[0] + '_' + str(
            iter_ + 1) + os.path.splitext(ckpt_file)[1]
        if not test:
            # update flow model:
            fce_.train_flow_fce(epochs=5,
                                objConstant=-1.,
                                cutoff=.5 - eps,
                                lr=lr_flow)
            # update energy based model:
            fce_.train_ebm_fce(epochs=50,
                               augment=augment_ebm,
                               cutoff=.5 + eps,
                               lr=lr_ebm,
                               useVAT=False)

            torch.save(
                {
                    'ebm_mlp': fce_.energy_MLP.state_dict(),
                    'ebm_finalLayer': fce_.ebm_finalLayer,
                    'flow': fce_.flow_model.state_dict()
                }, mid_ckpt_file)
        else:
            state = torch.load(mid_ckpt_file, map_location=fce_.device)
            fce_.energy_MLP.load_state_dict(state['ebm_mlp'])
            fce_.ebm_finalLayer = state['ebm_finalLayer']
            fce_.flow_model.load_stat_dict(state['flow'])

        # evaluate recovery of latents
        recov = fce_.unmixSamples(X, modelChoice='ebm')
        source_est_ica = FastICA().fit_transform((recov))
        recov_sources.append(source_est_ica)

    return recov_sources