import Build_Model import Testing import logs from sklearn import cross_validation import numpy as np new_model = logs.logs_object('TeraSort') MapFeature_list = np.array(new_model.get_MapFeature_list()) RedFeature_list =np.array(new_model.get_RedFeature_list()) MapMean_list = np.array(new_model.get_MapMean_list()) RedMean_list = np.array(new_model.get_RedMean_list()) MapDev_list = np.array(new_model.get_MapDev_list()) RedDev_list = np.array(new_model.get_RedDev_list()) Target_list = np.array(new_model.get_target_list()) for i in range(10): MF_train, MF_test,RF_train, RF_test, MM_train, MM_test, RM_train, RM_test,MD_train, MD_test,RD_train, RD_test,T_train, T_test,= cross_validation.train_test_split(MapFeature_list,RedFeature_list,MapMean_list,RedMean_list,MapDev_list,RedDev_list,Target_list, test_size=0.3, random_state=0) print 'start' engine = Build_Model.WhatIf_Engine(MF_train,RF_train,MM_train,RM_train,MD_train,RD_train,T_train) engine.Build_MapMean_Model() engine.Build_MapDev_Model() engine.Build_RedMean_Model() engine.Build_RedDev_Model() engine.Build_Final_Model() predict_result = Testing.test(MF_test,RF_test)
import json import commands import preprocess import numpy as np import logs new_model = logs.logs_object("TeraSort") target_list = np.array(new_model.get_target_list()) feature_list = np.array(new_model.get_AllFeature_list()) interData_list = np.array(new_model.get_RedMean_list()) import pylab as pl from sklearn.feature_selection import SelectPercentile, f_regression from sklearn import svm pl.figure(1) pl.clf() selector = SelectPercentile(f_regression, percentile=100) selector.fit(feature_list, interData_list) scores = -np.log10(selector.pvalues_) scores /= scores.max()
import Build_Model import Testing import logs from sklearn import cross_validation import numpy as np new_model = logs.logs_object('pairs-co-occur') MapFeature_list = np.array(new_model.get_MapFeature_list()) RedFeature_list =np.array(new_model.get_RedFeature_list()) MapMean_list = np.array(new_model.get_MapMean_list()) RedMean_list = np.array(new_model.get_RedMean_list()) MapDev_list = np.array(new_model.get_MapDev_list()) RedDev_list = np.array(new_model.get_RedDev_list()) Target_list = np.array(new_model.get_target_list()) for i in range(10): MF_train, MF_test,RF_train, RF_test, MM_train, MM_test, RM_train, RM_test,MD_train, MD_test,RD_train, RD_test,T_train, T_test,= cross_validation.train_test_split(MapFeature_list,RedFeature_list,MapMean_list,RedMean_list,MapDev_list,RedDev_list,Target_list, test_size=0.3, random_state=0) engine = Build_Model.WhatIf_Engine(MF_train,RF_train,MM_train,RM_train,MD_train,RD_train,T_train) engine.Build_MapMean_Model() engine.Build_MapDev_Model() engine.Build_RedMean_Model() engine.Build_RedDev_Model() engine.Build_Final_Model() predict_result = Testing.test(MF_test,RF_test)
import logs import numpy as np from sklearn import preprocessing raw_logs_object = logs.logs_object('TeraSort') feature_list = raw_logs_object.get_RedFeature_list() def PreProcess(feature_list): return preprocessing.scale(feature_list)
import Build_Model import Testing import logs from sklearn import cross_validation import numpy as np new_model = logs.logs_object('word count') MapFeature_list = np.array(new_model.get_MapFeature_list()) RedFeature_list =np.array(new_model.get_RedFeature_list()) MapMean_list = np.array(new_model.get_MapMean_list()) RedMean_list = np.array(new_model.get_RedMean_list()) MapDev_list = np.array(new_model.get_MapDev_list()) RedDev_list = np.array(new_model.get_RedDev_list()) Target_list = np.array(new_model.get_target_list()) for i in range(10): MF_train, MF_test,RF_train, RF_test, MM_train, MM_test, RM_train, RM_test,MD_train, MD_test,RD_train, RD_test,T_train, T_test,= cross_validation.train_test_split(MapFeature_list,RedFeature_list,MapMean_list,RedMean_list,MapDev_list,RedDev_list,Target_list, test_size=0.3, random_state=0) engine = Build_Model.WhatIf_Engine(MF_train,RF_train,MM_train,RM_train,MD_train,RD_train,T_train) engine.Build_MapMean_Model() engine.Build_MapDev_Model() engine.Build_RedMean_Model() engine.Build_RedDev_Model() engine.Build_Final_Model() predict_result = Testing.test(MF_test,RF_test)
import Build_Model import Testing import logs from sklearn import cross_validation import numpy as np new_model = logs.logs_object('TeraSort','terasort_model') MapFeature_list = np.array(new_model.get_MapFeature_list()) RedFeature_list =np.array(new_model.get_RedFeature_list()) MapMean_list = np.array(new_model.get_MapMean_list()) RedMean_list = np.array(new_model.get_RedMean_list()) MapDev_list = np.array(new_model.get_MapDev_list()) RedDev_list = np.array(new_model.get_RedDev_list()) Target_list = np.array(new_model.get_target_list()) for i in range(10): MF_train, MF_test,RF_train, RF_test, MM_train, MM_test, RM_train, RM_test,MD_train, MD_test,RD_train, RD_test,T_train, T_test,= cross_validation.train_test_split(MapFeature_list,RedFeature_list,MapMean_list,RedMean_list,MapDev_list,RedDev_list,Target_list, test_size=0.3, random_state=0) engine = Build_Model.WhatIf_Engine(MF_train,RF_train,MM_train,RM_train,MD_train,RD_train,T_train) engine.Build_MapMean_Model() engine.Build_MapDev_Model() engine.Build_RedMean_Model() engine.Build_RedDev_Model() engine.Build_Final_Model() predict_result = Testing.test(MF_test,RF_test)