def randomize(self, data_matrix, amount=.5): random.seed(self.random_state) inclusion_threshold = random.uniform(amount, 1) selectors = [] if random.random() > inclusion_threshold: selectors.append( SparseSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( MaxVolSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( QuickShiftSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( DensitySelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( OnionSelector(random_state=random.randint(1, 1e9))) if not selectors: selectors.append( DensitySelector(random_state=random.randint(1, 1e9))) selectors.append( SparseSelector(random_state=random.randint(1, 1e9))) selector = CompositeSelector(selectors=selectors) selector.randomize(data_matrix, amount=amount) self.selectors = deepcopy(selector.selectors) self.metric = 'rbf' self.kwds = {'gamma': random.choice([10**x for x in range(-3, 3)])} if random.random() > inclusion_threshold: self.n_nearest_neighbors = random.randint(3, 20) else: self.n_nearest_neighbors = None self.n_links = random.randint(1, 5) self.random_state = self.random_state ^ random.randint(1, 1e9)
def randomize(self, data_matrix, amount=.5): random.seed(self.random_state) inclusion_threshold = random.uniform(amount, 1) selectors = [] if random.random() > inclusion_threshold: selectors.append(SparseSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(MaxVolSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(QuickShiftSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(DensitySelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(OnionSelector(random_state=random.randint(1, 1e9))) if not selectors: selectors.append(DensitySelector(random_state=random.randint(1, 1e9))) selectors.append(SparseSelector(random_state=random.randint(1, 1e9))) selector = CompositeSelector(selectors=selectors) selector.randomize(data_matrix, amount=amount) self.selectors = deepcopy(selector.selectors) self.metric = 'rbf' self.kwds = {'gamma': random.choice([10 ** x for x in range(-3, 3)])} if random.random() > inclusion_threshold: self.n_nearest_neighbors = random.randint(3, 20) else: self.n_nearest_neighbors = None self.n_nearest_neighbor_links = random.randint(1, 5) self.random_state = self.random_state ^ random.randint(1, 1e9)
def feature_construction(data_matrix_original, selectors): from eden.selector import CompositeSelector selector = CompositeSelector(selectors) from eden.selector import Projector projector = Projector(selector, metric='cosine') data_matrix = projector.fit_transform(data_matrix_original) return data_matrix
class Projector(object): """Constructs features as the instance similarity to a set of instances as defined by the selector. Parameters ---------- selector : Selector TODO. scale : bool (default True) If true then the data matrix returned is standardized to have 0 mean and unit variance scaling_factor : float (default 0.8) Multiplicative factor applied after normalization. This can be useful when data needs to be post-processed by neural networks and one wishes to push data in a linear region. random_state : int (deafault 1) The seed used for the pseudo-random generator. metric : string, or callable The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is "precomputed", X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them. **kwds : optional keyword parameters Any further parameters are passed directly to the kernel function. """ def __init__(self, selector=AllSelector(), scale=True, scaling_factor=0.8, random_state=1, metric='rbf', **kwds): self.selector = selector self.scale = scale self.scaling_factor = scaling_factor self.scaler = StandardScaler() self.metric = metric self.kwds = kwds self.random_state = random_state def __repr__(self): serial = [] serial.append('Projector:') serial.append('metric: %s' % self.metric) if self.kwds is None or len(self.kwds) == 0: pass else: serial.append('params:') serial.append(serialize_dict(self.kwds)) serial.append(str(self.selector)) return '\n'.join(serial) def fit(self, data_matrix, target=None): """Fit the estimator on the samples. Parameters ---------- data_matrix : array-like, shape = (n_samples, n_features) Samples. Returns ------- self """ self.selected_instances = self.selector.fit_transform(data_matrix, target=target) if self.scale: self.scale = False self.scaler.fit(self.transform(data_matrix)) self.scale = True return self def fit_transform(self, data_matrix, target=None): """Fit the estimator on the samples and transforms features as the instance similarity to a set of instances as defined by the selector. Parameters ---------- data_matrix : array, shape = (n_samples, n_features) Samples. target : TODO Returns ------- data_matrix : array, shape = (n_samples, n_features_new) Transformed array. """ self.fit(data_matrix, target) return self.transform(data_matrix) def transform(self, data_matrix): """Transforms features as the instance similarity to a set of instances as defined by the selector. Parameters ---------- data_matrix : array, shape = (n_samples, n_features) Samples. Returns ------- data_matrix : array, shape = (n_samples, n_features_new) Transformed array. """ if self.selected_instances is None: raise Exception('Error: attempt to use transform on non fit model') if self.selected_instances.shape[0] == 0: raise Exception( 'Error: attempt to use transform using 0 selectors') data_matrix_out = pairwise_kernels(data_matrix, Y=self.selected_instances, metric=self.metric, **self.kwds) if self.scale: data_matrix_out = self.scaler.transform( data_matrix_out) * self.scaling_factor return data_matrix_out def randomize(self, data_matrix, amount=.5): random.seed(self.random_state) inclusion_threshold = random.uniform(amount, 1) selectors = [] if random.random() > inclusion_threshold: selectors.append( QuickShiftSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( DecisionSurfaceSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( SparseSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( MaxVolSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( DensitySelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append( OnionSelector(random_state=random.randint(1, 1e9))) if not selectors: selectors.append( QuickShiftSelector(random_state=random.randint(1, 1e9))) self.selector = CompositeSelector(selectors=selectors) self.selector.randomize(data_matrix, amount=amount) self.metric = 'rbf' self.kwds = {'gamma': random.choice([10**x for x in range(-3, 3)])} self.random_state = self.random_state ^ random.randint(1, 1e9)
class Projector(object): """Constructs features as the instance similarity to a set of instances as defined by the selector. Parameters ---------- selector : Selector TODO. scale : bool (default True) If true then the data matrix returned is standardized to have 0 mean and unit variance scaling_factor : float (default 0.8) Multiplicative factor applied after normalization. This can be useful when data needs to be post-processed by neural networks and one wishes to push data in a linear region. random_state : int (deafault 1) The seed used for the pseudo-random generator. metric : string, or callable The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is "precomputed", X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them. **kwds : optional keyword parameters Any further parameters are passed directly to the kernel function. """ def __init__(self, selector=AllSelector(), scale=True, scaling_factor=0.8, random_state=1, metric='rbf', **kwds): self.selector = selector self.scale = scale self.scaling_factor = scaling_factor self.scaler = StandardScaler() self.metric = metric self.kwds = kwds self.random_state = random_state def __repr__(self): serial = [] serial.append('Projector:') serial.append('metric: %s' % self.metric) if self.kwds is None or len(self.kwds) == 0: pass else: serial.append('params:') serial.append(serialize_dict(self.kwds)) serial.append(str(self.selector)) return '\n'.join(serial) def fit(self, data_matrix, target=None): """Fit the estimator on the samples. Parameters ---------- data_matrix : array-like, shape = (n_samples, n_features) Samples. Returns ------- self """ self.selected_instances = self.selector.fit_transform(data_matrix, target=target) if self.scale: self.scale = False self.scaler.fit(self.transform(data_matrix)) self.scale = True return self def fit_transform(self, data_matrix, target=None): """Fit the estimator on the samples and transforms features as the instance similarity to a set of instances as defined by the selector. Parameters ---------- data_matrix : array, shape = (n_samples, n_features) Samples. target : TODO Returns ------- data_matrix : array, shape = (n_samples, n_features_new) Transformed array. """ self.fit(data_matrix, target) return self.transform(data_matrix) def transform(self, data_matrix): """Transforms features as the instance similarity to a set of instances as defined by the selector. Parameters ---------- data_matrix : array, shape = (n_samples, n_features) Samples. Returns ------- data_matrix : array, shape = (n_samples, n_features_new) Transformed array. """ if self.selected_instances is None: raise Exception('Error: attempt to use transform on non fit model') if self.selected_instances.shape[0] == 0: raise Exception('Error: attempt to use transform using 0 selectors') # TODO: the first instance is more important than others in a selector, so it should # receive a weight proportional to the rank e.g. 1/rank^p # the selector should return also a rank information for each feature, note: for the # composite selector it is important to distinguish the rank of multiple selectors data_matrix_out = pairwise_kernels(data_matrix, Y=self.selected_instances, metric=self.metric, **self.kwds) if self.scale: data_matrix_out = self.scaler.transform(data_matrix_out) * self.scaling_factor return data_matrix_out def randomize(self, data_matrix, amount=.5): random.seed(self.random_state) inclusion_threshold = random.uniform(amount, 1) selectors = [] if random.random() > inclusion_threshold: selectors.append(QuickShiftSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(DecisionSurfaceSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(SparseSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(MaxVolSelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(DensitySelector(random_state=random.randint(1, 1e9))) if random.random() > inclusion_threshold: selectors.append(OnionSelector(random_state=random.randint(1, 1e9))) if not selectors: selectors.append(QuickShiftSelector(random_state=random.randint(1, 1e9))) self.selector = CompositeSelector(selectors=selectors) self.selector.randomize(data_matrix, amount=amount) self.metric = 'rbf' self.kwds = {'gamma': random.choice([10 ** x for x in range(-3, 3)])} self.random_state = self.random_state ^ random.randint(1, 1e9)