Example #1
0
Created on Thu Jun 11 01:36:52 2015

tester

@author: rhmbp
"""

from ml_lib import lst_import_json, TransformerQ3
from sklearn.externals import joblib
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from ml_config import X_FEATURE_Q3, PATH_PKL_Q3

# 1) Import & load the json file  ###
record = []
record = lst_import_json()[0:20]

# 2) Convert category feature regressable form (One Hot Encoded)  ###
t = TransformerQ3()
lst_dct_bool_cat = t.flatten(record, X_FEATURE_Q3)  # convert X to boolean form

#==============================================================================
# At this point, lst_dct_bool is a list of all category records, with a
# boolean form dict for each record. DictVectorizer can take this form
# lst_dct_bool_cat => [{ Doctor:1, Restaurant:0, .... }]
#==============================================================================

v = DictVectorizer(sparse=False)  # create the feature extractor
X = v.fit_transform(lst_dct_bool_cat)  # One Hot Encode the feature set
print v.get_feature_names(), '\n\n\n\n'
Example #2
0
Created on Thu Jun 11 01:36:52 2015
ml_q2_trainer.py

Trains a K Nearest Neighbors Regressor from a json source file.

@author: rhmbp
"""
from ml_lib import lst_import_json, TransformerQ2
from ml_config import X_FEATURES_Q2, Y_FEATURE_Q2, PATH_PKL_Q2
from sklearn import neighbors, grid_search
from sklearn.externals import joblib
import pandas as pd

# 1) import & load the json file
record = []
record = lst_import_json()


# 2) create the custom transformer & select your features
try:
    t = TransformerQ2()
except:
    print "transformer failed"

features = X_FEATURES_Q2  # must match list in ml_q2_worker.py

t.transform(record, X_FEATURES_Q2, Y_FEATURE_Q2)

print type(t.X), type(t.y)

Example #3
0
Created on Thu Jun 11 01:36:52 2015

tester

@author: rhmbp
"""

from ml_lib import lst_import_json, TransformerQ3
from sklearn.externals import joblib
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from ml_config import X_FEATURE_Q3, PATH_PKL_Q3

# 1) Import & load the json file  ###
record = []
record = lst_import_json()[0:20]

# 2) Convert category feature regressable form (One Hot Encoded)  ###
t = TransformerQ3()
lst_dct_bool_cat = t.flatten(record, X_FEATURE_Q3) # convert X to boolean form

#==============================================================================
# At this point, lst_dct_bool is a list of all category records, with a
# boolean form dict for each record. DictVectorizer can take this form
# lst_dct_bool_cat => [{ Doctor:1, Restaurant:0, .... }]
#==============================================================================


v = DictVectorizer(sparse=False)  # create the feature extractor
X = v.fit_transform(lst_dct_bool_cat)  # One Hot Encode the feature set
print v.get_feature_names(), '\n\n\n\n'