Example #1
0
"""
Examples using rosetta.modeling.eda
"""
import pandas as pd
import numpy as np
import matplotlib.pylab as pl

from rosetta.modeling import eda

N = 1000

# Make a linear income vs. age relationship.
age = pd.Series(100 * np.random.rand(N))
age.name = 'age'
income = 10 * age + 10 * np.random.randn(N)

# The relationship E[Y | X=x] is linear
pl.figure(1); pl.clf()
eda.plot_reducedY_vs_binnedX(age, income)


# Make a sigmoidal P[cancer | X=x] relationship
def sigmoid(x):
    x_st = 5 * (x - x.mean()) / x.std()
    return np.exp(x_st) / (1 + np.exp(x_st))

has_cancer = (np.random.rand(N) < sigmoid(age)).astype('int')
pl.figure(2); pl.clf()
eda.plot_reducedY_vs_binnedX(age, has_cancer)
Example #2
0
###############################################################################
# X-Y plotting
###############################################################################
N = 1000

# Make a linear income vs. age relationship.
age = pd.Series(100 * rand(N))
age.name = 'age'
income = 10 * age + 10 * randn(N)
income.name = 'income'

# The relationship E[Y | X=x] is linear
pl.figure(1)
pl.clf()
eda.plot_reducedY_vs_binnedX(age, income)


# Make a sigmoidal P[is_manager | X=x] relationship
def sigmoid(x):
    x_st = 5 * (x - x.mean()) / x.std()
    return np.exp(x_st) / (1 + np.exp(x_st))


is_manager = (rand(N) < sigmoid(age)).astype('int')
is_manager.name = 'is_manager'
pl.figure(2)
pl.clf()
eda.plot_reducedY_vs_binnedX(age, is_manager)

###############################################################################
Example #3
0

###############################################################################
# X-Y plotting
###############################################################################
N = 1000

# Make a linear income vs. age relationship.
age = pd.Series(100 * rand(N))
age.name = 'age'
income = 10 * age + 10 * randn(N)
income.name = 'income'

# The relationship E[Y | X=x] is linear
pl.figure(1); pl.clf()
eda.plot_reducedY_vs_binnedX(age, income)


# Make a sigmoidal P[is_manager | X=x] relationship
def sigmoid(x):
    x_st = 5 * (x - x.mean()) / x.std()
    return np.exp(x_st) / (1 + np.exp(x_st))

is_manager = (rand(N) < sigmoid(age)).astype('int')
is_manager.name = 'is_manager'
pl.figure(2); pl.clf()
eda.plot_reducedY_vs_binnedX(age, is_manager)


###############################################################################
# Correlation matrix plotting