Ejemplo n.º 1
0
def recursive_feature_elimination(input_data,
                                  feature_names,
                                  estimator=SVC(kernel='linear'),
                                  n_features_to_select=None,
                                  step=0.1):
    """
    Recursively eliminates features from x_train and x_test using
    scikit-learn's RFE, see documentation:
    http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
    If feature_names is given it is also returned with any features from
    x_train and x_test also removed from feature_names.

    Args:
        input_data (tuple):                   x_train, y_train, x_test, y_test
        feature_names (list):                 The names of all features before
                                              feature selection or None.
        estimator (object):                   Passed to RFE, see documentation
        n_features_to_select (int or None):   Passed to RFE, see documentation
        step (int or float):                  Passed to RFE, see documentation

    Returns:
        tuple: (x_train, y_train, x_test, y_test), feature_names, input_Args
    """
    x_train = input_data[0]
    y_train = input_data[1]
    x_test = input_data[2]
    y_test = input_data[3]

    dims = len(x_train.shape)
    if dims == 3:
        x_train = flatten(x_train)
        x_test = flatten(x_test)
    feature_selector = RFE(estimator, n_features_to_select, step)
    x_train = feature_selector.fit_transform(x_train, y_train)
    x_test = feature_selector.transform(x_test)
    if dims == 3:
        x_train = make3D(x_train)
        x_test = make3D(x_test)

    output_data = (x_train, y_train, x_test, y_test)

    if feature_names is not None:
        mask = feature_selector.get_support()
        feature_names = feature_names[mask]

    args = {
        'estimator': estimator,
        'n_features_to_select': n_features_to_select,
        'step': step
    }

    return output_data, feature_names, args
Ejemplo n.º 2
0
def neural_network(input_data, feature_names=None, validate=True):
    """
    Constructs, compiles, trains and tests/makes predictions with a neural
    network.

    Args:
        input_data (tuple):     x_train, y_train, x_test, y_test
        feature_names (list):   Ignored, here for compatability.
        validate (bool):        If True, an accuracy is returned, if False a
                                list of predictions for x_test is returned.

    Returns:
        float: model accuracy
        or
        list: predicted classifications for x_test.
    """
    feature_names = None

    x_train = input_data[0]
    y_train = input_data[1]
    x_test = input_data[2]
    y_test = input_data[3]

    all_labels = np.concatenate((y_train, y_test))
    unique_labels = np.unique(all_labels)
    num_classes = unique_labels.shape[0]

    y_train = to_categorical(y_train, num_classes=num_classes)
    if validate:
        y_test = to_categorical(y_test, num_classes=num_classes)

    if len(x_train.shape) == 2:
        x_train = make3D(x_train)
        x_test = make3D(x_test)
    model = Sequential()
    model.add(
        Conv1D(filters=10,
               kernel_size=3,
               activation='relu',
               input_shape=(x_train.shape[1], 1)))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=50, batch_size=10, verbose=0)
    if validate:
        evaluation = model.evaluate(x_test, y_test, batch_size=1, verbose=0)
        output = evaluation[1]
    else:
        output = model.predict(x_test)
    return (output, feature_names)
Ejemplo n.º 3
0
def select_percentile(input_data,
                      feature_names,
                      score_func=chi2,
                      percentile=5):
    """
    Selects the percentile best features in x_train, removes the rest of the
    features from x_train and x_test. Selects the best features by using the
    score function score_func and scikit-learn's SelectPercentile. If
    feature_names is given it is also returned with any features removed from
    x_train and x_test also removed from feature_names.

    Args:
        input_data (tuple):     x_train, y_train, x_test, y_test
        feature_names (list):   The names of all features before selection or
                                None.
        score_func (function):  The score function to be passed to SelectKBest
        percentile (int):       Percentile of features to keep.

    Returns:
        tuple: (x_train, y_train, x_test, y_test), feature_names, input_args
    """
    if score_func == f_classif:
        input_data, feature_names, _ = remove_constant(input_data,
                                                       feature_names)

    x_train = input_data[0]
    y_train = input_data[1]
    x_test = input_data[2]
    y_test = input_data[3]

    dims = len(x_train.shape)
    if dims == 3:
        x_train = flatten(x_train)
        x_test = flatten(x_test)
    feature_selector = SelectPercentile(score_func=score_func,
                                        percentile=percentile)
    x_train = feature_selector.fit_transform(x_train, y_train)
    x_test = feature_selector.transform(x_test)
    if dims == 3:
        x_train = make3D(x_train)
        x_test = make3D(x_test)

    output_data = (x_train, y_train, x_test, y_test)

    if feature_names is not None:
        mask = feature_selector.get_support()
        feature_names = feature_names[mask]

    return output_data, feature_names, {
        'score_func': score_func,
        'percentile': percentile
    }
Ejemplo n.º 4
0
def select_fdr(input_data,
               feature_names=None,
               score_func=f_classif,
               alpha=0.05):
    if score_func == f_classif:
        input_data, feature_names, _ = remove_constant(input_data,
                                                       feature_names)

    x_train = input_data[0]
    y_train = input_data[1]
    x_test = input_data[2]
    y_test = input_data[3]

    dims = len(x_train.shape)
    if dims == 3:
        x_train = flatten(x_train)
        x_test = flatten(x_test)

    done = False
    increment = alpha
    while not done:
        feature_selector = SelectFdr(score_func=score_func, alpha=alpha)
        temp_x_train = feature_selector.fit_transform(x_train, y_train)
        temp_x_test = feature_selector.transform(x_test)
        if temp_x_train.shape[1] > 1 and temp_x_test.shape[1] > 1:
            done = True
            x_train = temp_x_train
            x_test = temp_x_test
        else:
            msg = 'Feature selection was too aggresive, '
            msg += 'increasing alpha from {} to {}'.format(
                alpha, alpha + increment)
            alpha += increment
            logging.warning(msg)

    if dims == 3:
        x_train = make3D(x_train)
        x_test = make3D(x_test)

    output_data = (x_train, y_train, x_test, y_test)
    if feature_names is not None:
        mask = feature_selector.get_support()
        feature_names = feature_names[mask]

    logging.info('Selected {} features'.format(x_train.shape[1]))

    final_args = {'score_func': score_func, 'alpha': alpha}

    return output_data, feature_names, final_args
Ejemplo n.º 5
0
def recursive_feature_elimination_cv(input_data,
                                     feature_names,
                                     step=0.1,
                                     cv=3,
                                     estimator=SVC(kernel='linear')):
    """
    Recursively elinates features from x_train and x_test with cross
    validation, uses scikit-learn's RFECV see documentation:
    http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html
    If feature_names is given it is also returned with any features from
    x_train and x_test also removed from feature_names.

    Args:
        input_data (tuple):     x_train, y_train, x_test, y_test
        feature_names:          The names of all features before feature
                                selection or None.
        estimator (object):     Passed to RFECV, see documentation
        step (int or float):    Passed to RFECV, see documentation
        cv (int):               Passed to RFECV, see documentation

    Returns:
        tuple: (x_train, y_train, x_test, y_test), feature_names, input_args
    """
    x_train = input_data[0]
    y_train = input_data[1]
    x_test = input_data[2]
    y_test = input_data[3]

    dims = len(x_train.shape)
    if dims == 3:
        x_train = flatten(x_train)
        x_test = flatten(x_test)
    feature_selector = RFECV(estimator, step, cv)
    x_train = feature_selector.fit_transform(x_train, y_train)
    x_test = feature_selector.transform(x_test)
    if dims == 3:
        x_train = make3D(x_train)
        x_test = make3D(x_test)

    output_data = (x_train, y_train, x_test, y_test)

    if feature_names is not None:
        mask = feature_selector.get_support()
        feature_names = feature_names[mask]

    args = {'step': step, 'cv': cv, 'estimator': estimator}

    return output_data, feature_names, args
Ejemplo n.º 6
0
def variance_threshold(input_data, feature_names, threshold=0.16):
    """
    Removes all features from x_train and x_test whose variances in x_train is
    less than threshold. Uses scikit-learn's VarianceThreshold If feature_names
    is given it is also returned with any features removed from x_train and
    x_test also removed from feature_names.

    Args:
        input_data (tuple):     x_train, y_train, x_test, y_test
        feature_names (list):   The names of all features before selection or
                                None.
        threshold (float):      Lower limit of variance for a feature to be kept

    Returns:
        tuple: (x_train, y_train, x_test, y_test), feature_names, input_args
    """
    x_train = input_data[0]
    y_train = input_data[1]
    x_test = input_data[2]
    y_test = input_data[3]

    dims = len(x_train.shape)
    if dims == 3:
        x_train = flatten(x_train)
        x_test = flatten(x_test)
    feature_selector = VarianceThreshold(threshold=threshold)
    x_train = feature_selector.fit_transform(x_train)
    x_test = feature_selector.transform(x_test)
    if dims == 3:
        x_train = make3D(x_train)
        x_test = make3D(x_test)

    output_data = (x_train, y_train, x_test, y_test)

    if feature_names is not None:
        mask = feature_selector.get_support()
        feature_names = feature_names[mask]

    return output_data, feature_names, {'threshold': threshold}
Ejemplo n.º 7
0
 def setUp(self):
     self.input = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8],
                                                               [9]]])
     self.output = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     self.flat = flatten(self.input)
     self.threeD = make3D(self.output)