class VarianceThreshold(FeatureSelectionAlgorithm): r"""Implementation of feature selection using variance threshold. Date: 2020 Author: Luka Pečnik License: MIT Documentation: https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html See Also: * :class:`niaaml.preprocessing.feature_selection.feature_selection_algorithm.FeatureSelectionAlgorithm` """ Name = 'Variance Threshold' def __init__(self, **kwargs): r"""Initialize VarianceThreshold feature selection algorithm. """ self._params = dict( threshold=ParameterDefinition(MinMax(0, 0.1), np.float)) self.__variance_threshold = VarThr() def set_parameters(self, **kwargs): r"""Set the parameters/arguments of the algorithm. """ self.__variance_threshold.set_params(**kwargs) def select_features(self, x, y, **kwargs): r"""Perform the feature selection process. Arguments: x (pandas.core.frame.DataFrame): Array of original features. y (pandas.core.series.Series) Expected classifier results. Returns: numpy.ndarray[bool]: Mask of selected features. """ self.__variance_threshold.fit(x) return self.__variance_threshold.get_support() def to_string(self): r"""User friendly representation of the object. Returns: str: User friendly representation of the object. """ return FeatureSelectionAlgorithm.to_string(self).format( name=self.Name, args=self._parameters_to_string( self.__variance_threshold.get_params()))
from sklearn.feature_selection import VarianceThreshold print('need to transpose to time by features') feature_vector = spike_counts_batch_sample_batch.T target_vector = intended_kin_sample.T print(f'the feature vector has the shape of {feature_vector.shape}') print(f'the target vector has the shape of {target_vector.shape}\n') #set up the selector variance_selector = VarianceThreshold(threshold=0) #calculate the empirical variances feature_variances = variance_selector.fit(feature_vector) variance_selector_params = variance_selector.get_params() #fit the transform transformed_feature_vec = variance_selector.fit_transform(feature_vector) #this needs to be fitted before returning the selected features, eh. selected_feature_indx = variance_selector.get_support() print(f'we know the feature variances to be {feature_variances}') print(f'we know the parameters are {variance_selector_params}') print( f'after transform, we know the dim of the feature vector {transformed_feature_vec.shape}\n' ) print(f'we therefore, know the selected indices are {selected_feature_indx}') # In[ ]:
# feature_list_of_all_instances = StandardScaler().fit_transform( feature_list_of_all_instances ) for i in range(0, len(feature_list_of_all_instances)): index = 0 for j in range(0, len(feature_list_of_all_instances[i])): if j < 1282: temp_features[i][j] = feature_list_of_all_instances[i][j] # feature_list_of_all_instances = temp_features feature_list_of_all_instances = StandardScaler().fit_transform( feature_list_of_all_instances) # print("for threshold T ",t ) variance = VarianceThreshold() print(variance.get_params()) feature_list_of_all_instances = variance.fit_transform( feature_list_of_all_instances) print("reduced feature number ", len(feature_list_of_all_instances[1])) feature_list_of_all_instances = SelectKBest(f_classif, k=100).fit_transform( feature_list_of_all_instances, class_list_of_all_instances) # feature_list_of_all_instances = SelectKBest(f_classif,k=50).fit_transform(feature_list_of_all_instances,class_list_of_all_instances) # print("reduced feature number ", len(feature_list_of_all_instances[1])) # count = 0 # count = 0 # redunrent_list = [] # for i in range(0,519): # # print("checking ", i,"th column" )