def split_data(X, y, tr_size, rnd_state, name, w_train, w_test): X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, train_size=tr_size, random_state=rnd_state) train = Data(X_trn, y_trn, name + "_trn", w_train) test = Data(X_tst, y_tst, name + "_tst", w_test) return train, test
def k3validplot(dt_dev, opts): print "Running k3 validation plots" # Create BDT bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=opts.maxdepth), algorithm='SAMME', n_estimators=opts.ntrees, learning_rate=opts.lrate) # Generate three equal datasets X1, X_temp, y1, y_temp = cross_validation.train_test_split(dt_dev.data, dt_dev.targets, train_size = 0.33, random_state = 10293845) X2, X3, y2, y3 = cross_validation.train_test_split(X_temp, y_temp, train_size = 0.5, random_state = 56478392) # make data combinations def combine(d1,d2,t1,t2,name,sf): return Data(np.concatenate((d1,d2),axis=0), np.concatenate((t1,t2),axis=0), name, sf) X12 = combine(X1,X2,y1,y2,"k1train",1) X13 = combine(X1,X3,y1,y3,"k2train",1) X23 = combine(X2,X3,y2,y3,"k3train",1) X1 = Data(X1, y1, "k1", 1) X2 = Data(X2, y2, "k2", 1) X3 = Data(X3, y3, "k3", 1) # Test on 3 dt = time.time() print "k1 started...\t", time.time() bdt.fit(X12.getDataNoWeight(), X12.targets) test_train_compare(bdt, X12, X3, "plots/validation/k1train_ztravelRemoved.png") # Test on 2 print "k2 started...\t", time.time(), "time diff", time.time()-dt bdt.fit(X13.getDataNoWeight(), X13.targets) test_train_compare(bdt, X13, X2, "plots/validation/k2train_ztravelRemoved.png") # Test on 1 print "k3 started...\t", time.time(), "time diff", time.time()-dt bdt.fit(X23.getDataNoWeight(), X23.targets) test_train_compare(bdt, X23, X1, "plots/validation/k3train_ztravelRemoved.png") print "End time: ", time.time(), "total run time: ", dt - time.time()
def __init__(self, parent=None, positionsize=None, new=False): super().__init__(parent) X = positionsize if X is not None: self.setGeometry(QtCore.QRect(X[0], X[1], X[2], X[3])) self.setFrameShape(QFrame.WinPanel) self.setFrameShadow(QFrame.Raised) self.data = Data(new) self.param = Screenlabel()
# Load NuGen print "Loading NuGen..." dt_nugen = ReadData(f_nugen, m_sname_E2, opts.sigcut) # Load Low Energy NuGen print "Loading NuGen LE..." dt_nugenLE = ReadData(f_nugen, m_sname_E2, "!" + opts.sigcut) # Load Corsika and low enegy corsika print "Loading Corsika..." dt_corsika = ReadData(f_corsika, m_sname_corsika, "") dt_corsikaLE = ReadData(f_corsikaLE, m_sname_corsikaLE, "") # combine dt_corsika = Data( np.concatenate((dt_corsika.data, dt_corsikaLE.data), axis=0), np.concatenate((dt_corsika.targets, dt_corsikaLE.targets), axis=0), "totalCorsika", 1) print "Loading data..." dt_data = ReadData(f_data, m_sname_data, "") #dt_data = None #print dt_nugen.data #print dt_nugenLE.data #print dt_corsika.data dt_total = Data(np.concatenate((dt_nugen.data, dt_corsika.data), axis=0), np.concatenate((dt_nugen.targets, dt_corsika.targets), axis=0), "total", 1) print "Saving..."
def combine(Xsig, Xbkg, ysig, ybkg, name, sf): return Data(np.concatenate((Xsig, Xbkg), axis=0), np.concatenate((ysig, ybkg), axis=0), name, sf)
def combine(d1,d2,t1,t2,name,sf): return Data(np.concatenate((d1,d2),axis=0), np.concatenate((t1,t2),axis=0), name, sf)