Created on Thu Jun 11 01:36:52 2015 tester @author: rhmbp """ from ml_lib import lst_import_json, TransformerQ3 from sklearn.externals import joblib from sklearn.feature_extraction import DictVectorizer from sklearn.linear_model import LinearRegression from ml_config import X_FEATURE_Q3, PATH_PKL_Q3 # 1) Import & load the json file ### record = [] record = lst_import_json()[0:20] # 2) Convert category feature regressable form (One Hot Encoded) ### t = TransformerQ3() lst_dct_bool_cat = t.flatten(record, X_FEATURE_Q3) # convert X to boolean form #============================================================================== # At this point, lst_dct_bool is a list of all category records, with a # boolean form dict for each record. DictVectorizer can take this form # lst_dct_bool_cat => [{ Doctor:1, Restaurant:0, .... }] #============================================================================== v = DictVectorizer(sparse=False) # create the feature extractor X = v.fit_transform(lst_dct_bool_cat) # One Hot Encode the feature set print v.get_feature_names(), '\n\n\n\n'
Created on Thu Jun 11 01:36:52 2015 ml_q2_trainer.py Trains a K Nearest Neighbors Regressor from a json source file. @author: rhmbp """ from ml_lib import lst_import_json, TransformerQ2 from ml_config import X_FEATURES_Q2, Y_FEATURE_Q2, PATH_PKL_Q2 from sklearn import neighbors, grid_search from sklearn.externals import joblib import pandas as pd # 1) import & load the json file record = [] record = lst_import_json() # 2) create the custom transformer & select your features try: t = TransformerQ2() except: print "transformer failed" features = X_FEATURES_Q2 # must match list in ml_q2_worker.py t.transform(record, X_FEATURES_Q2, Y_FEATURE_Q2) print type(t.X), type(t.y)