Example #1
0
 def randomize(self, data_matrix, amount=.5):
     random.seed(self.random_state)
     inclusion_threshold = random.uniform(amount, 1)
     selectors = []
     if random.random() > inclusion_threshold:
         selectors.append(
             SparseSelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(
             MaxVolSelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(
             QuickShiftSelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(
             DensitySelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(
             OnionSelector(random_state=random.randint(1, 1e9)))
     if not selectors:
         selectors.append(
             DensitySelector(random_state=random.randint(1, 1e9)))
         selectors.append(
             SparseSelector(random_state=random.randint(1, 1e9)))
     selector = CompositeSelector(selectors=selectors)
     selector.randomize(data_matrix, amount=amount)
     self.selectors = deepcopy(selector.selectors)
     self.metric = 'rbf'
     self.kwds = {'gamma': random.choice([10**x for x in range(-3, 3)])}
     if random.random() > inclusion_threshold:
         self.n_nearest_neighbors = random.randint(3, 20)
     else:
         self.n_nearest_neighbors = None
     self.n_links = random.randint(1, 5)
     self.random_state = self.random_state ^ random.randint(1, 1e9)
Example #2
0
 def randomize(self, data_matrix, amount=.5):
     random.seed(self.random_state)
     inclusion_threshold = random.uniform(amount, 1)
     selectors = []
     if random.random() > inclusion_threshold:
         selectors.append(SparseSelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(MaxVolSelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(QuickShiftSelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(DensitySelector(random_state=random.randint(1, 1e9)))
     if random.random() > inclusion_threshold:
         selectors.append(OnionSelector(random_state=random.randint(1, 1e9)))
     if not selectors:
         selectors.append(DensitySelector(random_state=random.randint(1, 1e9)))
         selectors.append(SparseSelector(random_state=random.randint(1, 1e9)))
     selector = CompositeSelector(selectors=selectors)
     selector.randomize(data_matrix, amount=amount)
     self.selectors = deepcopy(selector.selectors)
     self.metric = 'rbf'
     self.kwds = {'gamma': random.choice([10 ** x for x in range(-3, 3)])}
     if random.random() > inclusion_threshold:
         self.n_nearest_neighbors = random.randint(3, 20)
     else:
         self.n_nearest_neighbors = None
     self.n_nearest_neighbor_links = random.randint(1, 5)
     self.random_state = self.random_state ^ random.randint(1, 1e9)
Example #3
0
def feature_construction(data_matrix_original, selectors):
    from eden.selector import CompositeSelector
    selector = CompositeSelector(selectors)
    from eden.selector import Projector
    projector = Projector(selector, metric='cosine')
    data_matrix = projector.fit_transform(data_matrix_original)
    return data_matrix
Example #4
0
class Projector(object):
    """Constructs features as the instance similarity to a set of instances as
    defined by the selector.

    Parameters
    ----------
    selector : Selector
        TODO.

    scale : bool (default True)
        If true then the data matrix returned is standardized to have 0 mean and unit variance

    scaling_factor : float (default 0.8)
        Multiplicative factor applied after normalization. This can be useful when data needs to be
        post-processed by neural networks and one wishes to push data in a linear region.

    random_state : int (deafault 1)
        The seed used for the pseudo-random generator.

    metric : string, or callable
        The metric to use when calculating kernel between instances in a
        feature array. If metric is a string, it must be one of the metrics
        in pairwise.PAIRWISE_KERNEL_FUNCTIONS.
        If metric is "precomputed", X is assumed to be a kernel matrix.
        Alternatively, if metric is a callable function, it is called on each
        pair of instances (rows) and the resulting value recorded. The callable
        should take two arrays from X as input and return a value indicating
        the distance between them.

    **kwds : optional keyword parameters
        Any further parameters are passed directly to the kernel function.
    """
    def __init__(self,
                 selector=AllSelector(),
                 scale=True,
                 scaling_factor=0.8,
                 random_state=1,
                 metric='rbf',
                 **kwds):
        self.selector = selector
        self.scale = scale
        self.scaling_factor = scaling_factor
        self.scaler = StandardScaler()
        self.metric = metric
        self.kwds = kwds
        self.random_state = random_state

    def __repr__(self):
        serial = []
        serial.append('Projector:')
        serial.append('metric: %s' % self.metric)
        if self.kwds is None or len(self.kwds) == 0:
            pass
        else:
            serial.append('params:')
            serial.append(serialize_dict(self.kwds))
        serial.append(str(self.selector))
        return '\n'.join(serial)

    def fit(self, data_matrix, target=None):
        """Fit the estimator on the samples.

        Parameters
        ----------
        data_matrix : array-like, shape = (n_samples, n_features)
            Samples.

        Returns
        -------
        self
        """
        self.selected_instances = self.selector.fit_transform(data_matrix,
                                                              target=target)
        if self.scale:
            self.scale = False
            self.scaler.fit(self.transform(data_matrix))
            self.scale = True
        return self

    def fit_transform(self, data_matrix, target=None):
        """Fit the estimator on the samples and transforms features as the instance
        similarity to a set of instances as defined by the selector.

        Parameters
        ----------
        data_matrix : array, shape = (n_samples, n_features)
          Samples.

        target : TODO

        Returns
        -------
        data_matrix : array, shape = (n_samples, n_features_new)
            Transformed array.
        """
        self.fit(data_matrix, target)
        return self.transform(data_matrix)

    def transform(self, data_matrix):
        """Transforms features as the instance similarity to a set of instances as
        defined by the selector.

        Parameters
        ----------
        data_matrix : array, shape = (n_samples, n_features)
          Samples.

        Returns
        -------
        data_matrix : array, shape = (n_samples, n_features_new)
            Transformed array.
        """
        if self.selected_instances is None:
            raise Exception('Error: attempt to use transform on non fit model')
        if self.selected_instances.shape[0] == 0:
            raise Exception(
                'Error: attempt to use transform using 0 selectors')
        data_matrix_out = pairwise_kernels(data_matrix,
                                           Y=self.selected_instances,
                                           metric=self.metric,
                                           **self.kwds)
        if self.scale:
            data_matrix_out = self.scaler.transform(
                data_matrix_out) * self.scaling_factor
        return data_matrix_out

    def randomize(self, data_matrix, amount=.5):
        random.seed(self.random_state)
        inclusion_threshold = random.uniform(amount, 1)
        selectors = []
        if random.random() > inclusion_threshold:
            selectors.append(
                QuickShiftSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(
                DecisionSurfaceSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(
                SparseSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(
                MaxVolSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(
                DensitySelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(
                OnionSelector(random_state=random.randint(1, 1e9)))
        if not selectors:
            selectors.append(
                QuickShiftSelector(random_state=random.randint(1, 1e9)))
        self.selector = CompositeSelector(selectors=selectors)
        self.selector.randomize(data_matrix, amount=amount)
        self.metric = 'rbf'
        self.kwds = {'gamma': random.choice([10**x for x in range(-3, 3)])}
        self.random_state = self.random_state ^ random.randint(1, 1e9)
Example #5
0
class Projector(object):

    """Constructs features as the instance similarity to a set of instances as
    defined by the selector.

    Parameters
    ----------
    selector : Selector
        TODO.

    scale : bool (default True)
        If true then the data matrix returned is standardized to have 0 mean and unit variance

    scaling_factor : float (default 0.8)
        Multiplicative factor applied after normalization. This can be useful when data needs to be
        post-processed by neural networks and one wishes to push data in a linear region.

    random_state : int (deafault 1)
        The seed used for the pseudo-random generator.

    metric : string, or callable
        The metric to use when calculating kernel between instances in a
        feature array. If metric is a string, it must be one of the metrics
        in pairwise.PAIRWISE_KERNEL_FUNCTIONS.
        If metric is "precomputed", X is assumed to be a kernel matrix.
        Alternatively, if metric is a callable function, it is called on each
        pair of instances (rows) and the resulting value recorded. The callable
        should take two arrays from X as input and return a value indicating
        the distance between them.

    **kwds : optional keyword parameters
        Any further parameters are passed directly to the kernel function.
    """

    def __init__(self, selector=AllSelector(),
                 scale=True,
                 scaling_factor=0.8,
                 random_state=1,
                 metric='rbf', **kwds):
        self.selector = selector
        self.scale = scale
        self.scaling_factor = scaling_factor
        self.scaler = StandardScaler()
        self.metric = metric
        self.kwds = kwds
        self.random_state = random_state

    def __repr__(self):
        serial = []
        serial.append('Projector:')
        serial.append('metric: %s' % self.metric)
        if self.kwds is None or len(self.kwds) == 0:
            pass
        else:
            serial.append('params:')
            serial.append(serialize_dict(self.kwds))
        serial.append(str(self.selector))
        return '\n'.join(serial)

    def fit(self, data_matrix, target=None):
        """Fit the estimator on the samples.

        Parameters
        ----------
        data_matrix : array-like, shape = (n_samples, n_features)
            Samples.

        Returns
        -------
        self
        """
        self.selected_instances = self.selector.fit_transform(data_matrix, target=target)
        if self.scale:
            self.scale = False
            self.scaler.fit(self.transform(data_matrix))
            self.scale = True
        return self

    def fit_transform(self, data_matrix, target=None):
        """Fit the estimator on the samples and transforms features as the instance
        similarity to a set of instances as defined by the selector.

        Parameters
        ----------
        data_matrix : array, shape = (n_samples, n_features)
          Samples.

        target : TODO

        Returns
        -------
        data_matrix : array, shape = (n_samples, n_features_new)
            Transformed array.
        """
        self.fit(data_matrix, target)
        return self.transform(data_matrix)

    def transform(self, data_matrix):
        """Transforms features as the instance similarity to a set of instances as
        defined by the selector.

        Parameters
        ----------
        data_matrix : array, shape = (n_samples, n_features)
          Samples.

        Returns
        -------
        data_matrix : array, shape = (n_samples, n_features_new)
            Transformed array.
        """
        if self.selected_instances is None:
            raise Exception('Error: attempt to use transform on non fit model')
        if self.selected_instances.shape[0] == 0:
            raise Exception('Error: attempt to use transform using 0 selectors')
        # TODO: the first instance is more important than others in a selector, so it should
        # receive a weight proportional to the rank e.g. 1/rank^p
        # the selector should return also a rank information for each feature, note: for the
        # composite selector it is important to distinguish the rank of multiple selectors
        data_matrix_out = pairwise_kernels(data_matrix,
                                           Y=self.selected_instances,
                                           metric=self.metric,
                                           **self.kwds)
        if self.scale:
            data_matrix_out = self.scaler.transform(data_matrix_out) * self.scaling_factor
        return data_matrix_out

    def randomize(self, data_matrix, amount=.5):
        random.seed(self.random_state)
        inclusion_threshold = random.uniform(amount, 1)
        selectors = []
        if random.random() > inclusion_threshold:
            selectors.append(QuickShiftSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(DecisionSurfaceSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(SparseSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(MaxVolSelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(DensitySelector(random_state=random.randint(1, 1e9)))
        if random.random() > inclusion_threshold:
            selectors.append(OnionSelector(random_state=random.randint(1, 1e9)))
        if not selectors:
            selectors.append(QuickShiftSelector(random_state=random.randint(1, 1e9)))
        self.selector = CompositeSelector(selectors=selectors)
        self.selector.randomize(data_matrix, amount=amount)
        self.metric = 'rbf'
        self.kwds = {'gamma': random.choice([10 ** x for x in range(-3, 3)])}
        self.random_state = self.random_state ^ random.randint(1, 1e9)