def run_demo_with_sampling(self, dts_name, model_data, features): ''' Parameters ---------- dts_name : str model_data : dict Required keys: ['model_name', 'sampler_name', 'pca_bool', 'n_estim', 'box_type'] Almost everything get_model needs features : list of int Only real values are allowed (-1 is not valid) Returns ------- (train, test) : tuple of list of dict The scores for many models equal in everything except in C for linear_svc ''' train_dicts = [] test_dicts = [] dataset = get_data(dts_name, n_ins=self.n_ins) for C in self.valores_C: model = get_model(C=C, **model_data) train_score, test_score = get_sampling_model_scores( model, dataset, features) train_score['label'] = 'C = {}'.format(C) test_score['label'] = 'C = {}'.format(C) train_dicts.append(train_score) test_dicts.append(test_score) return train_dicts, test_dicts
def run_demo_with_sampling(self, dts_name, dts_size, model_data, features): ''' Gets the score of two models, (as specified in model_data) but one using first PCA and the other using first sampler Parameters ---------- dts_name : str dts_size : int model_data : dict Data needed to generate a model. Required keys: ['model_name', 'sampler_name', 'n_estim', 'box_type'] features : list of int A list with real features to test with. Values of -1 are not allowed Returns ------- (train, test) : tuple of list of dict The scores for two models specified, one first sampler, the other first pca Keys of dict: ['absi', 'ord', 'label'] ''' dataset = get_data(dts_name, n_ins=dts_size) model_first_sampler = get_model(pca_bool=True, **model_data) # TODO Esta función ya no existe, llamar a la genérica con el # parámetro indicador del orden model_first_pca = get_model_first_pca(pca_bool=True, **model_data) first_sampler_train_score, first_sampler_test_score =\ get_sampling_model_scores(model_first_sampler, dataset, features) first_sampler_train_score['label'] = 'Sampler >> PCA' first_sampler_test_score['label'] = 'Sampler >> PCA' first_pca_train_score, first_pca_test_score =\ get_sampling_model_scores(model_first_pca, dataset, features) first_pca_train_score['label'] = 'PCA >> Sampler' first_pca_test_score['label'] = 'PCA >> Sampler' train_dicts = [first_sampler_train_score, first_pca_train_score] test_dicts = [first_sampler_test_score, first_pca_test_score] return train_dicts, test_dicts
def run_demo_with_sampling(self, model, features): train_dicts = [] test_dicts = [] for dts_name in self.all_datasets_names: dataset = get_data(dts_name) train_score, test_score = get_sampling_model_scores( model, dataset, features) train_score['label'] = dts_name test_score['label'] = dts_name train_dicts.append(train_score) test_dicts.append(test_score) return train_dicts, test_dicts
def run_demo_with_sampling(self, dts_name, dts_size, model_data, hparams, features): ''' Gets the score of many models, all equal (specified in model_data) except for the gamma value of the sampler, which uses self.gammas for each model. Parameters ---------- dts_name : str model_data : dict Data needed to generate a model. Required keys: ['model_name', 'sampler_name', 'pca_bool', 'n_estim', 'box_type'] features : list of int A list with real features to test with. Values of -1 are not allowed Returns ------- (train, test) : tuple of list of dict The scores for many models, which only disagree in the gamma value Keys of dict: ['absi', 'ord', 'label'] ''' train_dicts = [] test_dicts = [] dataset = get_data(dts_name, n_ins=dts_size) model_params = self.get_hparams(model_data['model_name'], hparams) for g in self.gammas: # model = get_model(gamma=g, **model_data) # TODO un poco cutre # model_params = self.get_hparams(model_data['model_name'], # hparams) # model_data tiene # 'model_name' # 'sampler_name' # 'pca_bool' # 'n_estim' # 'box_type' model = get_model(rbfsampler_gamma=g, nystroem_gamma=g, model_params=model_params, **model_data) train_score, test_score, errors =\ get_sampling_model_scores(model, dataset, features) train_score['label'] = 'gamma {}'.format(g) test_score['label'] = 'gamma {}'.format(g) train_dicts.append(train_score) test_dicts.append(test_score) # Ejecutar también el caso de no usar sampler # model_data['sampler_name'] = 'identity' m_data = dict(model_data) m_data['sampler_name'] = 'identity' model = get_model(rbfsampler_gamma=g, nystroem_gamma=g, model_params=model_params, **m_data) tr_score, te_score = get_non_sampling_model_scores(model, dataset) train_score = { 'absi': [features[0], features[-1]], 'ord': [tr_score, tr_score], 'label': 'No sampler' } test_score = { 'absi': [features[0], features[-1]], 'ord': [te_score, te_score], 'label': 'No sampler' } # train_score['label'] = 'No sampler' # test_score['label'] = 'No sampler' train_dicts.append(train_score) test_dicts.append(test_score) # return train_dicts, test_dicts # self.train_scores.append(train_dicts) # self.test_scores.append(test_dicts) self.train_scores = train_dicts self.test_scores = test_dicts
def run_demo(self, dts_name, dts_size, features_range, models): ''' Just reading from the arguments, returns a pair of list of dictionarys, with the scores of the demo. Pair is (train, test) Parameters ---------- dts_name : str dts_size : int features_range : list of int shape: [2], increasing order models : list of dict each dict is a model. Required keys: ['model_name', 'sampler', 'box_type', 'n_estim', 'pca'] Values of 'sampler' and 'box_type' are str or None Returns ------- (train_scores, test_scores) : tuple of list of dict Dict with keys ['absi', 'ord', 'label'] ''' info_run = ''' - Dataset: **{0}** - Size: **{1}** ''' self.run_specific = info_run.format(dts_name, dts_size) dataset = get_data(dts_name, n_ins=dts_size) train_scores = [] test_scores = [] for m in models: model_name = m['model_name'] sampler_name = m['sampler_name'] box_type = m['box_type'] n_estim = m['n_estim'] pca = m['pca'] if box_type == 'none': n_estim = None clf = get_model(model_name=model_name, sampler_name=sampler_name, pca_bool=pca, n_estim=n_estim, box_type=box_type) n_splits_features = 30 features = list(range(*features_range)) if (features_range[1] - features_range[0]) > n_splits_features: features = np.linspace(*features_range, num=n_splits_features, dtype=np.int).tolist() if sampler_name == 'identity': features = None if sampler_name is 'identity': # train_score y test_score son floats train_score, test_score =\ get_non_sampling_model_scores(clf, dataset) lab = self.get_label(model_name, sampler_name, box_type, n_estim, pca) train_score = { 'absi': features_range, 'ord': [train_score, train_score], 'label': lab, } test_score = { 'absi': features_range, 'ord': [test_score, test_score], 'label': lab, } else: # train_score y test_score son diccionarios train_score, test_score =\ get_sampling_model_scores(clf, dataset, features) lab = self.get_label(model_name, sampler_name, box_type, n_estim, pca) train_score['label'] = lab test_score['label'] = lab train_scores.append(train_score) test_scores.append(test_score) return train_scores, test_scores
def run_demo(self, dts_name, dts_size, features_range, models, hparams, rbfsampler_gamma, nystroem_gamma): ''' First it clears self.train_scores and self.test_scores, and then runs the demo, appending to those the results of each of the models. The results are in the shape of a dictionary, with keys ['absi', 'ord', 'label'] It is resistant to failing of some of the models. If that happens, a warning in raised, self.ERRORS is filled with some info, and the execution continues with the rest of the models. Parameters ---------- dts_name : str dts_size : int features_range : list of int shape: [2], increasing order models : list of dict each dict is a model. Required keys: ['model_name', 'sampler', 'box_type', 'n_estim', 'pca'] Values of 'sampler' and 'box_type' are str or None hparams : dict Required keys: ['dt', 'logit', 'linearsvc'] Returns ------- None ''' info_run = ''' - Dataset: **{0}** - Size: **{1}** ''' # self.run_specific = info_run.format(dts_name, dts_size) info_run = { 'Dataset': dts_name, 'Size': dts_size, 'RFF gamma': rbfsampler_gamma, 'Nystroem gamma': nystroem_gamma, 'DT max. depth': hparams['dt']['max_depth'], 'DT min. samples split': hparams['dt']['min_samples_split'], 'DT min. samples leaf': hparams['dt']['min_samples_leaf'], 'DT min. weight fraction leaf': hparams['dt']['min_weight_fraction_leaf'], 'DT max. leaf nodes': hparams['dt']['max_leaf_nodes'], 'DT min. impurity decrease': hparams['dt']['min_impurity_decrease'], 'Logit C': hparams['logit']['C'], # 'Linear SVC': hparams['linearsvc']['C'], 'Linear SVC': hparams['linear_svc']['C'], } self.run_specific = self.get_run_specific_widget(info_run) dataset = get_data(dts_name, n_ins=dts_size) # train_scores = [] # test_scores = [] self.train_scores.clear() self.test_scores.clear() for m in models: model_name = m['model_name'] sampler_name = m['sampler_name'] box_type = m['box_type'] n_estim = m['n_estim'] pca = m['pca'] pca_first = m['pca_first'] model_params = self.get_hparams(model_name, hparams) if box_type == 'none': n_estim = None # clf = get_model(model_name=model_name, model_params=model_params, # sampler_name=sampler_name, pca_bool=pca, # pca_first=pca_first, n_estim=n_estim, # box_type=box_type) clf = get_model(model_name=model_name, model_params=model_params, sampler_name=sampler_name, pca_bool=pca, pca_first=pca_first, n_estim=n_estim, box_type=box_type, rbfsampler_gamma=rbfsampler_gamma, nystroem_gamma=nystroem_gamma) n_splits_features = 30 features = list(range(*features_range)) if (features_range[1] - features_range[0]) > n_splits_features: features = np.linspace(*features_range, num=n_splits_features, dtype=np.int).tolist() if sampler_name == 'identity': features = None if sampler_name == 'identity': # train_score y test_score son floats train_score, test_score =\ get_non_sampling_model_scores(clf, dataset) lab = self.get_label(model_name, sampler_name, box_type, n_estim, pca, pca_first) train_score = { 'absi': features_range, 'ord': [train_score, train_score], 'label': lab, } test_score = { 'absi': features_range, 'ord': [test_score, test_score], 'label': lab, } else: # train_score y test_score son diccionarios train_score, test_score, errors =\ get_sampling_model_scores(clf, dataset, features) self.ERRORS.extend(errors) lab = self.get_label(model_name, sampler_name, box_type, n_estim, pca, pca_first) train_score['label'] = lab test_score['label'] = lab self.train_scores.append(train_score) self.test_scores.append(test_score)