Esempi in Python per get_data, esempi in Python per demo_utils.general.get_data

Esempio n. 1

0

Mostra file

File: demo4.py Progetto: ribes96/TFG

    def run_demo_with_sampling(self, dts_name, model_data, features):
        '''
        Parameters
        ----------
        dts_name : str
        model_data : dict
            Required keys: ['model_name', 'sampler_name', 'pca_bool',
            'n_estim', 'box_type']
            Almost everything get_model needs
        features : list of int
            Only real values are allowed (-1 is not valid)

        Returns
        -------
        (train, test) : tuple of list of dict
            The scores for many models equal in everything except in C for
            linear_svc
        '''
        train_dicts = []
        test_dicts = []
        dataset = get_data(dts_name, n_ins=self.n_ins)
        for C in self.valores_C:
            model = get_model(C=C, **model_data)
            train_score, test_score = get_sampling_model_scores(
                model, dataset, features)
            train_score['label'] = 'C = {}'.format(C)
            test_score['label'] = 'C = {}'.format(C)

            train_dicts.append(train_score)
            test_dicts.append(test_score)

        return train_dicts, test_dicts

Esempio n. 2

0

Mostra file

    def run_demo_with_sampling(self, model, features):
        train_dicts = []
        test_dicts = []
        for dts_name in self.all_datasets_names:
            dataset = get_data(dts_name)
            train_score, test_score = get_sampling_model_scores(
                model, dataset, features)
            train_score['label'] = dts_name
            test_score['label'] = dts_name

            train_dicts.append(train_score)
            test_dicts.append(test_score)

        return train_dicts, test_dicts

Esempio n. 3

0

Mostra file

    def run_demo_with_sampling(self, dts_name, dts_size, model_data, features):
        '''
        Gets the score of two models, (as specified in model_data)
        but one using first PCA and the other using first sampler

        Parameters
        ----------
        dts_name : str
        dts_size : int
        model_data : dict
            Data needed to generate a model.
            Required keys: ['model_name', 'sampler_name', 'n_estim',
            'box_type']
        features : list of int
            A list with real features to test with. Values of -1 are not
            allowed

        Returns
        -------
        (train, test) : tuple of list of dict
            The scores for two models specified, one first sampler, the other
            first pca
            Keys of dict: ['absi', 'ord', 'label']
        '''
        dataset = get_data(dts_name, n_ins=dts_size)
        model_first_sampler = get_model(pca_bool=True, **model_data)
        # TODO Esta función ya no existe, llamar a la genérica con el
        # parámetro indicador del orden
        model_first_pca = get_model_first_pca(pca_bool=True, **model_data)

        first_sampler_train_score, first_sampler_test_score =\
            get_sampling_model_scores(model_first_sampler, dataset, features)
        first_sampler_train_score['label'] = 'Sampler >> PCA'
        first_sampler_test_score['label'] = 'Sampler >> PCA'

        first_pca_train_score, first_pca_test_score =\
            get_sampling_model_scores(model_first_pca, dataset, features)
        first_pca_train_score['label'] = 'PCA >> Sampler'
        first_pca_test_score['label'] = 'PCA >> Sampler'

        train_dicts = [first_sampler_train_score, first_pca_train_score]
        test_dicts = [first_sampler_test_score, first_pca_test_score]

        return train_dicts, test_dicts

Esempio n. 4

0

Mostra file

    def run_demo(self, dts_name, dts_size):
        '''
        Parameters
        ----------
        dts_name : str
            Name of the dataset to test. Must be one of SUPPORTED_DATASETS

        Returns
        -------
        (train_scores, test_scores) : tuple of list of dict
            Dicts have keys: ['absi', 'ord', 'label'], and 'absi' are valid
            numbers, -1 is not valid
        '''
        info_run = """
- Dataset: **{0}**
- Size: **{1}**
        """
        self.run_specific = info_run.format(dts_name, dts_size)
        # self.title = dts_name
        models_name = ['dt', 'logit', 'linear_svc']
        train_dicts = []
        test_dicts = []
        for model_name in models_name:
            dataset = get_data(dts_name, n_ins=dts_size)
            clf = get_model(model_name=model_name,
                            sampler_name='identity',
                            pca_bool=False,
                            box_type='none')
            train_score, test_score = get_non_sampling_model_scores(
                clf, dataset)
            train_score = {
                'absi': [0, 10],
                'ord': [train_score, train_score],
                'label': model_name,
            }
            test_score = {
                'absi': [0, 10],
                'ord': [test_score, test_score],
                'label': model_name,
            }
            train_dicts.append(train_score)
            test_dicts.append(test_score)
        return train_dicts, test_dicts

Esempio n. 5

0

Mostra file

def fun(dts_name):
    o_filename = f'experimental_results/random_forest/{dts_name}.json'
    if dts_name == 'mnist':
        data = get_mnist_data()
    elif dts_name == 'fashion_mnist':
        data = get_fashion_data()
    else:
        data = get_data(dataset_name=dts_name, prop_train=2 / 3, n_ins=5000)

    rf = RandomForestClassifier(n_estimators=50)

    data_train = data['data_train']
    data_test = data['data_test']
    target_train = data['target_train']
    target_test = data['target_test']

    print('Empieza el experimento')
    time0 = time.perf_counter()
    rf.fit(data_train, target_train)
    time1 = time.perf_counter()
    c_time = time1 - time0
    print('Termina el experimento')

    train_score = rf.score(data_train, target_train)
    test_score = rf.score(data_test, target_test)

    info = {
        "box_name": "none",
        "description": f"A normal RF ({dts_name})",
        "gamma": None,
        "label": "RF ",
        "model_name": "rf",
        "model_param": {},
        "test_score": test_score,
        "time": c_time,
        "train_score": train_score,
    }
    print(info)
    with open(o_filename, 'w') as f:
        json.dump([info], f, indent=4, sort_keys=True)

Esempio n. 6

0

Mostra file

    def run_demo_non_sampling(self, model):
        # run_demo llamará a esta o a la otra dependiendo del tipo.
        '''
        Parameters
        ----------
        model : abstract model
            Something on which you can call fit and score

        Returns
        -------
        (train_scores, test_scores) : tuple of list of dict
            dict with keys ['absi', 'ord', 'labels']
        '''
        train_scores = []
        test_scores = []
        for dts_name in self.all_datasets_names:
            dataset = get_data(dts_name)
            train_score, test_score = get_non_sampling_model_scores(
                model, dataset)
            train_scores.append(train_score)
            test_scores.append(test_score)
        train_dicts = []
        test_dicts = []
        for i, dts_name in enumerate(self.all_datasets_names):
            train_d = {
                'absi': [0, 1],
                'ord': [train_scores[i], train_scores[i]],
                'label': dts_name,
            }
            test_d = {
                'absi': [0, 1],
                'ord': [test_scores[i], test_scores[i]],
                'label': dts_name,
            }
            train_dicts.append(train_d)
            test_dicts.append(test_d)

        return train_dicts, test_dicts

Esempio n. 7

0

Mostra file

File: demo4.py Progetto: ribes96/TFG

    def run_demo_non_sampling(self, dts_name, model_data):
        # run_demo llamará a esta o a la otra dependiendo del tipo.
        '''
        Parameters
        ----------
        dts_name : str
        model_data : dict
            Required keys: ['model_name', 'sampler_name', 'pca_bool',
            'n_estim', 'box_type']
            Almost everything get_model needs

        Returns
        -------
        (train_scores, test_scores) : tuple of list of dict
            dict with keys ['absi', 'ord', 'labels']
        '''
        train_dicts = []
        test_dicts = []
        dataset = get_data(dts_name, n_ins=self.n_ins)
        for C in self.valores_C:
            model = get_model(C=C, **model_data)
            train_score, test_score = get_non_sampling_model_scores(
                model, dataset)

            train_d = {
                'absi': [0, 1],
                'ord': [train_score, train_score],
                'label': 'C = {}'.format(C),
            }
            test_d = {
                'absi': [0, 1],
                'ord': [test_score, test_score],
                'label': 'C = {}'.format(C),
            }
            train_dicts.append(train_d)
            test_dicts.append(test_d)

        return train_dicts, test_dicts

Esempio n. 8

0

Mostra file

    def run_demo_with_sampling(self, dts_name, dts_size, model_data, hparams, features):
        '''
        Gets the score of many models, all equal (specified in model_data)
        except for the gamma value of the sampler, which uses self.gammas for
        each model.

        Parameters
        ----------
        dts_name : str
        model_data : dict
            Data needed to generate a model.
            Required keys: ['model_name', 'sampler_name', 'pca_bool',
            'n_estim', 'box_type']
        features : list of int
            A list with real features to test with. Values of -1 are not
            allowed

        Returns
        -------
        (train, test) : tuple of list of dict
            The scores for many models, which only disagree in the gamma value
            Keys of dict: ['absi', 'ord', 'label']
        '''
        train_dicts = []
        test_dicts = []
        dataset = get_data(dts_name, n_ins=dts_size)
        model_params = self.get_hparams(model_data['model_name'], hparams)
        for g in self.gammas:
            # model = get_model(gamma=g, **model_data)
            # TODO un poco cutre
            # model_params = self.get_hparams(model_data['model_name'],
            # hparams)
            # model_data tiene
            # 'model_name'
            # 'sampler_name'
            # 'pca_bool'
            # 'n_estim'
            # 'box_type'
            model = get_model(rbfsampler_gamma=g, nystroem_gamma=g,
                              model_params=model_params, **model_data)
            train_score, test_score, errors =\
                get_sampling_model_scores(model, dataset, features)
            train_score['label'] = 'gamma {}'.format(g)
            test_score['label'] = 'gamma {}'.format(g)

            train_dicts.append(train_score)
            test_dicts.append(test_score)

        # Ejecutar también el caso de no usar sampler
        # model_data['sampler_name'] = 'identity'
        m_data = dict(model_data)
        m_data['sampler_name'] = 'identity'
        model = get_model(rbfsampler_gamma=g, nystroem_gamma=g,
                          model_params=model_params, **m_data)
        tr_score, te_score = get_non_sampling_model_scores(model, dataset)
        train_score = {
            'absi': [features[0], features[-1]],
            'ord': [tr_score, tr_score],
            'label': 'No sampler'
        }
        test_score = {
            'absi': [features[0], features[-1]],
            'ord': [te_score, te_score],
            'label': 'No sampler'
        }
        # train_score['label'] = 'No sampler'
        # test_score['label'] = 'No sampler'

        train_dicts.append(train_score)
        test_dicts.append(test_score)

        # return train_dicts, test_dicts
        # self.train_scores.append(train_dicts)
        # self.test_scores.append(test_dicts)

        self.train_scores = train_dicts
        self.test_scores = test_dicts

Esempio n. 9

0

Mostra file

    def run_demo(self, dts_name, dts_size, features_range, models):
        '''
        Just reading from the arguments, returns a pair of list of dictionarys,
        with the scores of the demo. Pair is (train, test)

        Parameters
        ----------
        dts_name : str
        dts_size : int
        features_range : list of int
            shape: [2], increasing order
        models : list of dict
            each dict is a model. Required keys: ['model_name', 'sampler',
            'box_type', 'n_estim', 'pca']
            Values of 'sampler' and 'box_type' are str or None

        Returns
        -------
        (train_scores, test_scores) : tuple of list of dict
            Dict with keys ['absi', 'ord', 'label']
        '''
        info_run = '''
- Dataset: **{0}**
- Size: **{1}**
        '''
        self.run_specific = info_run.format(dts_name, dts_size)
        dataset = get_data(dts_name, n_ins=dts_size)
        train_scores = []
        test_scores = []

        for m in models:
            model_name = m['model_name']
            sampler_name = m['sampler_name']
            box_type = m['box_type']
            n_estim = m['n_estim']
            pca = m['pca']
            if box_type == 'none':
                n_estim = None
            clf = get_model(model_name=model_name,
                            sampler_name=sampler_name,
                            pca_bool=pca,
                            n_estim=n_estim,
                            box_type=box_type)
            n_splits_features = 30
            features = list(range(*features_range))
            if (features_range[1] - features_range[0]) > n_splits_features:
                features = np.linspace(*features_range,
                                       num=n_splits_features,
                                       dtype=np.int).tolist()

            if sampler_name == 'identity':
                features = None

            if sampler_name is 'identity':
                # train_score y test_score son floats
                train_score, test_score =\
                    get_non_sampling_model_scores(clf, dataset)
                lab = self.get_label(model_name, sampler_name, box_type,
                                     n_estim, pca)
                train_score = {
                    'absi': features_range,
                    'ord': [train_score, train_score],
                    'label': lab,
                }
                test_score = {
                    'absi': features_range,
                    'ord': [test_score, test_score],
                    'label': lab,
                }
            else:
                # train_score y test_score son diccionarios
                train_score, test_score =\
                    get_sampling_model_scores(clf, dataset, features)
                lab = self.get_label(model_name, sampler_name, box_type,
                                     n_estim, pca)
                train_score['label'] = lab
                test_score['label'] = lab

            train_scores.append(train_score)
            test_scores.append(test_score)

        return train_scores, test_scores

Esempio n. 10

0

Mostra file

    def run_demo(self, dts_name, dts_size, features_range, models, hparams,
                 rbfsampler_gamma, nystroem_gamma):
        '''
        First it clears self.train_scores and self.test_scores, and then
        runs the demo, appending to those the results of each of the models.

        The results are in the shape of a dictionary, with keys ['absi', 'ord',
        'label']

        It is resistant to failing of some of the models. If that happens, a
        warning in raised, self.ERRORS is filled with some info, and the execution
        continues with the rest of the models.

        Parameters
        ----------
        dts_name : str
        dts_size : int
        features_range : list of int
            shape: [2], increasing order
        models : list of dict
            each dict is a model. Required keys: ['model_name', 'sampler',
            'box_type', 'n_estim', 'pca']
            Values of 'sampler' and 'box_type' are str or None
        hparams : dict
            Required keys: ['dt', 'logit', 'linearsvc']

        Returns
        -------
        None
        '''
        info_run = '''
- Dataset: **{0}**
- Size: **{1}**
        '''
        # self.run_specific = info_run.format(dts_name, dts_size)

        info_run = {
            'Dataset':
            dts_name,
            'Size':
            dts_size,
            'RFF gamma':
            rbfsampler_gamma,
            'Nystroem gamma':
            nystroem_gamma,
            'DT max. depth':
            hparams['dt']['max_depth'],
            'DT min. samples split':
            hparams['dt']['min_samples_split'],
            'DT min. samples leaf':
            hparams['dt']['min_samples_leaf'],
            'DT min. weight fraction leaf':
            hparams['dt']['min_weight_fraction_leaf'],
            'DT max. leaf nodes':
            hparams['dt']['max_leaf_nodes'],
            'DT min. impurity decrease':
            hparams['dt']['min_impurity_decrease'],
            'Logit C':
            hparams['logit']['C'],
            # 'Linear SVC': hparams['linearsvc']['C'],
            'Linear SVC':
            hparams['linear_svc']['C'],
        }
        self.run_specific = self.get_run_specific_widget(info_run)
        dataset = get_data(dts_name, n_ins=dts_size)
        # train_scores = []
        # test_scores = []

        self.train_scores.clear()
        self.test_scores.clear()

        for m in models:
            model_name = m['model_name']
            sampler_name = m['sampler_name']
            box_type = m['box_type']
            n_estim = m['n_estim']
            pca = m['pca']
            pca_first = m['pca_first']
            model_params = self.get_hparams(model_name, hparams)
            if box_type == 'none':
                n_estim = None
            # clf = get_model(model_name=model_name, model_params=model_params,
            #                 sampler_name=sampler_name, pca_bool=pca,
            #                 pca_first=pca_first, n_estim=n_estim,
            #                 box_type=box_type)
            clf = get_model(model_name=model_name,
                            model_params=model_params,
                            sampler_name=sampler_name,
                            pca_bool=pca,
                            pca_first=pca_first,
                            n_estim=n_estim,
                            box_type=box_type,
                            rbfsampler_gamma=rbfsampler_gamma,
                            nystroem_gamma=nystroem_gamma)

            n_splits_features = 30
            features = list(range(*features_range))
            if (features_range[1] - features_range[0]) > n_splits_features:
                features = np.linspace(*features_range,
                                       num=n_splits_features,
                                       dtype=np.int).tolist()

            if sampler_name == 'identity':
                features = None

            if sampler_name == 'identity':
                # train_score y test_score son floats
                train_score, test_score =\
                    get_non_sampling_model_scores(clf, dataset)

                lab = self.get_label(model_name, sampler_name, box_type,
                                     n_estim, pca, pca_first)
                train_score = {
                    'absi': features_range,
                    'ord': [train_score, train_score],
                    'label': lab,
                }
                test_score = {
                    'absi': features_range,
                    'ord': [test_score, test_score],
                    'label': lab,
                }
            else:
                # train_score y test_score son diccionarios
                train_score, test_score, errors =\
                    get_sampling_model_scores(clf, dataset, features)
                self.ERRORS.extend(errors)
                lab = self.get_label(model_name, sampler_name, box_type,
                                     n_estim, pca, pca_first)
                train_score['label'] = lab
                test_score['label'] = lab

            self.train_scores.append(train_score)
            self.test_scores.append(test_score)

Esempio n. 11

0

Mostra file