print(utils.multiclass.type_of_target(train_y))

# In[260]:

fold_generator = KFold(n_splits=5, shuffle=True, random_state=111)

# In[261]:

## use mse to score the model
use_metrics = ["neg_mean_squared_error"]

# - Linear Model

# In[262]:

lm_scores = cross_validate(LM(),
                           train_X,
                           train_y,
                           cv=fold_generator,
                           scoring=use_metrics)

# - KNN

# In[263]:

knn_scores = cross_validate(KNN(),
                            train_X,
                            train_y,
                            cv=fold_generator,
                            scoring=use_metrics)
Example #2
0
            df=gold_lex, cols=DIRECTIONS[curr_dir]['target'])

        n_in = source_lexicon.shape[1]
        n_out = target_lexicon.shape[1]
        baseline = MLP_Ensemble(embeddings=embeddings)
        my_model = framework.models.Mapping_Model(
            layers=[n_in] + MY_MODEL['hidden_layers'] + [n_out],
            activation=MY_MODEL['activation'],
            dropout_hidden=MY_MODEL['dropout_hidden'],
            train_steps=MY_MODEL['train_steps'],
            batch_size=MY_MODEL['batch_size'],
            optimizer=MY_MODEL['optimizer'],
            source_lexicon=source_lexicon)

        reference_LM = framework.models.SKlearn_Mapping_Model(
            base_model=LM(), source_lexicon=source_lexicon)
        reference_KNN = framework.models.SKlearn_Mapping_Model(
            base_model=KNN(n_neighbors=20), source_lexicon=source_lexicon)

        ev = framework.models.Evaluator(
            models={
                'baseline': baseline,
                'Reference_KNN': reference_KNN,
                'reference_LM': reference_LM,
                'my_model': my_model
            })
        ev.crossvalidate(words=gold_lex.index,
                         labels=target_lexicon,
                         k_splits=KFOLD,
                         outpath='results/{}/{}'.format(
                             curr_dir, setting.name))
Example #3
0
import keras.backend as K

from main.data import SETTINGS, VAD, BE5, DIRECTIONS

from framework.util import average_subdirs

INFO = '''

Ablation experiment in Section 4.1 (Figure 2).

'''

k_fold = 10

MODELS = {  #'knn':KNN(n_neighbors=20), #only use linreg because no hyperparas
    'lm': LM()
}

for setting in SETTINGS:
    print(setting.name)
    gold_lex = setting.load_data()

    #this experiment will only be performed for those gold lexicons which
    #also have dominance.
    if 'Dominance' in gold_lex.columns:
        for curr_dir in list(DIRECTIONS):
            print(curr_dir)
            source_lexicon = gold_lex[DIRECTIONS[curr_dir]['source']]
            target_lexicon = gold_lex[DIRECTIONS[curr_dir]['target']]

            for base_model_name, base_model in MODELS.items():
Example #4
0
train['Age']=2015-pd.DatetimeIndex(train['Open Date']).year
test['Age']=2015-pd.DatetimeIndex(test['Open Date']).year

#Extract the age and log transform it
X=np.log(train[['Age']].values.reshape((train.shape[0],1)))
Xt=np.log(test[['Age']].values.reshape((test.shape[0],1)))
y=train['revenue'].values

#randomize the order for cross validation
combined=zip(y,X)
shuffle(combined)
y[:], X[:] = zip(*combined)


#Model Setup
clf=LM()

scores=[]

ss=KFold(len(y), n_folds=3,shuffle=True)
for trainCV, testCV in ss:
    X_train, X_test, y_train, y_test= X[trainCV], X[testCV], y[trainCV], y[testCV]
    clf.fit(X_train, np.log(y_train))
    y_pred=np.exp(clf.predict(X_test))

    scores.append(mean_squared_error(y_test,y_pred))

#Average RMSE from cross validation
scores=np.array(scores)
print "CV Score:",np.mean(scores**0.5)
Example #5
0

# %% -----------------------------------------

# Generate the sample of the data.
X, y = gen_data(N=250)

# Plot at the synthetic data
D = pd.DataFrame(dict(y=y, x1=X[:, 0], x2=X[:, 1], x3=X[:, 2]))
(ggplot(D.melt(id_vars='y'), aes(x="value", y='y')) + geom_point(alpha=.25) +
 facet_wrap('variable') + theme(figure_size=(7, 3)))

# %% Build Model -----------------------------------------

# Linear model
mod_lm = LM()
mod_lm.fit(X, y)

# Random Forest model
mod_rf = RF()
mod_rf.fit(X, y)

# RF > LM
m.mean_squared_error(y, mod_lm.predict(X))
m.mean_squared_error(y, mod_rf.predict(X))

# Performance on future data.
test_X, test_y = gen_data(N=250, seed=2000)

m.mean_squared_error(test_y, mod_lm.predict(test_X))
m.mean_squared_error(test_y, mod_rf.predict(test_X))