Python extract_features Examples

Programming Language: Python

Namespace/Package Name: bacteriopop_utils

Method/Function: extract_features

Examples at hotexamples.com: 10

Python extract_features - 10 examples found. These are the top rated real world Python examples of bacteriopop_utils.extract_features extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: tests.py Project: dacb/USP-inhibition

 def test_on_animal_df(self):
     """
     Simple example with expected numpy vector to compare to.
     Use fillna mode.
     """
     animal_df = pd.DataFrame({'animal': ['dog', 'cat', 'rat'],
                               'color': ['white', 'brown', 'brown'],
                               'gender': ['F', 'F', np.NaN],
                               'weight': [25, 5, 1],
                               'garbage': [0, 1, np.NaN],
                               'abundance': [0.5, 0.4, 0.1]})
     extracted = bacteriopop_utils.extract_features(
         dataframe=animal_df,
         column_list=['animal', 'color', 'weight', 'abundance'],
         fillna=True
         )
     # check that the column names match what is expected
     self.assertEqual(extracted.columns.tolist(),
                      ['abundance', 'animal=cat', 'animal=dog',
                       'animal=rat', 'color=brown', 'color=white',
                       'weight'])
     # check that the values are what was expected.
     expected_result = np.array([[0.5, 0., 1., 0., 0., 1., 25.],
                                 [0.4, 1., 0., 0., 1., 0., 5.],
                                 [0.1, 0., 0., 1., 1., 0., 1.]])
     self.assertEqual(expected_result.tolist(),
                      extracted.as_matrix().tolist())

Example #2

Show file

def dbscan_demo():
    print 'starting up'
    df = load_data()
    print 'load done'
    df = extract_features(df)
    print 'features done'
    dbscan(df, 0.2, 10)

Example #3

Show file

File: dbscan.py Project: JanetMatsen/bacteriopop

def dbscan_demo():
    print 'starting up'
    df = load_data()
    print 'load done'
    df = extract_features(df)
    print 'features done'
    dbscan(df, 0.2, 10)

Example #4

Show file

File: gmm.py Project: JanetMatsen/bacteriopop

def gmm_demo():
    print 'starting up'
    df = load_data()
    print 'load done'
    df = extract_features(df)
    print 'extract done'
    features_list = list(df.columns.values)[1:]
    print 'features done'
    gmm(df, features_list)

Example #5

Show file

File: gmm.py Project: JanetMatsen/bacteriopop

def gmm_demo():
    print 'starting up'
    df = load_data()
    print 'load done'
    df = extract_features(df)
    print 'extract done'
    features_list = list(df.columns.values)[1:]
    print 'features done'
    gmm(df, features_list)

Example #6

Show file

File: driver.py Project: gaoy316/bacteriopop

def main():
    """
    Entry point for all code
    """
    print "starting up"
    df = load_data()
    df_vectorized = extract_features(df, column_list=FEATURES_TO_EXTRACT,
                                     fillna=True, debug=False)
    target_correlation = calculate_features_target_correlation(df_vectorized, df_vectorized.columns.tolist(),
                                                               PREDICTION_TARGET, PCA_METHOD)
    pca = pca_bacteria(df_vectorized, PCA_COMPONENTS)
    return target_correlation, pca

Example #7

Show file

File: dbscan.py Project: JanetMatsen/bacteriopop

import random

import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import DBSCAN
from load_data import load_data
from bacteriopop_utils import extract_features

DF = load_data()
DF = extract_features(DF)

def get_random_color(pastel_factor=0.5):
    return [(x+pastel_factor)/(1.0+pastel_factor) for x in
            [random.uniform(0, 1.0) for i in [1, 2, 3]]]


def color_distance(c1, c2):
    return sum([abs(x[0]-x[1]) for x in zip(c1, c2)])


def generate_new_color(existing_colors, pastel_factor=0.5):
    max_distance = None
    best_color = None
    for i in range(0, 100):
        color = get_random_color(pastel_factor=pastel_factor)
        if not existing_colors:
            return color
        best_distance = min([color_distance(color, c) for c in
                             existing_colors])
        if not max_distance or best_distance > max_distance:
            max_distance = best_distance

Example #8

Show file

import random

import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import DBSCAN
from load_data import load_data
from bacteriopop_utils import extract_features

DF = load_data()
DF = extract_features(DF)


def get_random_color(pastel_factor=0.5):
    return [(x + pastel_factor) / (1.0 + pastel_factor)
            for x in [random.uniform(0, 1.0) for i in [1, 2, 3]]]


def color_distance(c1, c2):
    return sum([abs(x[0] - x[1]) for x in zip(c1, c2)])


def generate_new_color(existing_colors, pastel_factor=0.5):
    max_distance = None
    best_color = None
    for i in range(0, 100):
        color = get_random_color(pastel_factor=pastel_factor)
        if not existing_colors:
            return color
        best_distance = min(
            [color_distance(color, c) for c in existing_colors])
        if not max_distance or best_distance > max_distance:

Example #9

Show file

File: gmm_demo.py Project: gaoy316/bacteriopop

def gmm_demo():
    df = load_data()
    df = extract_features(df)
    features_list = list(df.columns.values)[1:]
    gmm(df, features_list)

Example #10

Show file

def gmm_demo():
    df = load_data()
    df = extract_features(df)
    features_list = list(df.columns.values)[1:]
    gmm(df, features_list)