def fit_one(self, data, model_y, model_stereo): event_ids = numpy.unique(data.EventID.values) if self.train_size != None: event_ids_train, event_ids_test= train_test_split(event_ids, train_size=self.train_size, random_state=42) else: event_ids_test = event_ids # fit train tracks if self.train_size != None: tracks_train = {} p = Pool(self.processes) results_train = p.map(tracks_reconstruction, zip(event_ids_train, [data]*len(event_ids_train), [model_y]*len(event_ids_train), [model_stereo]*len(event_ids_train))) tracks_train = merge_dicts(results_train) # train clf if self.train_size != None: sc = SuperCombinator() combination_data = sc.data_collection(tracks_train, data) X_data = combination_data[combination_data.columns[:-1]].values y_data = combination_data.label.values xgb_base = XGBoostClassifier(n_estimators=1000, colsample=0.7, eta=0.01, nthreads=1, subsample=0.7, max_depth=8) folding = FoldingClassifier(xgb_base, n_folds=10, random_state=11) folding.fit(X_data, y_data) clf = folding.estimators[0] else: clf = None # fit test tracks tracks_test = {} p = Pool(self.processes) results_test = p.map(tracks_reconstruction, zip(event_ids_test, [data]*len(event_ids_test), [model_y]*len(event_ids_test), [model_stereo]*len(event_ids_test))) tracks_test = merge_dicts(results_test) # quality p = Pool(self.processes) effs = p.map(get_eff_value, zip(event_ids_test, [data]*len(event_ids_test), [tracks_test]*len(event_ids_test), [clf]*len(event_ids_test))) eff = 100. * numpy.array(effs).sum() / len(effs) return eff
def fit_one(self, data, model_y, model_stereo): event_ids = numpy.unique(data.EventID.values) if self.train_size != None: event_ids_train, event_ids_test = train_test_split( event_ids, train_size=self.train_size, random_state=42) else: event_ids_test = event_ids # fit train tracks if self.train_size != None: tracks_train = {} p = Pool(self.processes) results_train = p.map( tracks_reconstruction, zip(event_ids_train, [data] * len(event_ids_train), [model_y] * len(event_ids_train), [model_stereo] * len(event_ids_train))) tracks_train = merge_dicts(results_train) # train clf if self.train_size != None: sc = SuperCombinator() combination_data = sc.data_collection(tracks_train, data) X_data = combination_data[combination_data.columns[:-1]].values y_data = combination_data.label.values xgb_base = XGBoostClassifier(n_estimators=1000, colsample=0.7, eta=0.01, nthreads=1, subsample=0.7, max_depth=8) folding = FoldingClassifier(xgb_base, n_folds=10, random_state=11) folding.fit(X_data, y_data) clf = folding.estimators[0] else: clf = None # fit test tracks tracks_test = {} p = Pool(self.processes) results_test = p.map( tracks_reconstruction, zip(event_ids_test, [data] * len(event_ids_test), [model_y] * len(event_ids_test), [model_stereo] * len(event_ids_test))) tracks_test = merge_dicts(results_test) # quality p = Pool(self.processes) effs = p.map( get_eff_value, zip(event_ids_test, [data] * len(event_ids_test), [tracks_test] * len(event_ids_test), [clf] * len(event_ids_test))) eff = 100. * numpy.array(effs).sum() / len(effs) return eff