Exemple #1
0
    def __init__(self,
                 data,
                 Y,
                 algorithm=None,
                 cv_dict=None,
                 mask=None,
                 output_dir='.',
                 **kwargs):
        """ Initialize Predict.

        Args:
            data: nibabel data instance
            Y: vector of training labels
            subject_id: vector of labels corresponding to each subject
            algorithm: Algorithm to use for prediction.  Must be one of 'svm', 'svr',
                'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest',
                or 'randomforestClassifier'
            cv_dict: Type of cross_validation to use. A dictionary of
                {'type': 'kfolds', 'n_folds': n},
                {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or
                {'type': 'loso', 'subject_id': holdout},
                where n = number of folds, and subject = vector of subject ids that corresponds to self.Y
            mask: binary nibabel mask
            output_dir: Directory to use for writing all outputs
            **kwargs: Additional keyword arguments to pass to the prediction algorithm

        """

        self.output_dir = output_dir

        if mask is not None:
            if type(mask) is not nib.nifti1.Nifti1Image:
                raise ValueError("mask is not a nibabel instance")
            self.mask = mask
        else:
            self.mask = nib.load(
                os.path.join(get_resource_path(),
                             'MNI152_T1_2mm_brain_mask.nii.gz'))

        if type(data) is list:
            data = nib.concat_images(data)

        if not isinstance(data,
                          (nib.nifti1.Nifti1Image, nib.nifti1.Nifti1Pair)):
            raise ValueError("data is not a nibabel instance")
        self.nifti_masker = NiftiMasker(mask_img=mask)
        self.data = self.nifti_masker.fit_transform(data)

        if type(Y) is list:
            Y = np.array(Y)
        if self.data.shape[0] != len(Y):
            raise ValueError("Y does not match the correct size of data")
        self.Y = Y

        if algorithm is not None:
            self.set_algorithm(algorithm, **kwargs)

        if cv_dict is not None:
            self.cv = set_cv(cv_dict)
Exemple #2
0
    def __init__(self, data, Y, algorithm=None, cv_dict=None, mask=None,
                 output_dir='.', **kwargs):
        """ Initialize Predict.

        Args:
            data: nibabel data instance
            Y: vector of training labels
            subject_id: vector of labels corresponding to each subject
            algorithm: Algorithm to use for prediction.  Must be one of 'svm', 'svr',
                'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest',
                or 'randomforestClassifier'
            cv_dict: Type of cross_validation to use. A dictionary of
                {'type': 'kfolds', 'n_folds': n},
                {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or
                {'type': 'loso', 'subject_id': holdout},
                where n = number of folds, and subject = vector of subject ids that corresponds to self.Y
            mask: binary nibabel mask
            output_dir: Directory to use for writing all outputs
            **kwargs: Additional keyword arguments to pass to the prediction algorithm

        """

        self.output_dir = output_dir

        if mask is not None:
            if type(mask) is not nib.nifti1.Nifti1Image:
                raise ValueError("mask is not a nibabel instance")
            self.mask = mask
        else:
            self.mask = nib.load(os.path.join(get_resource_path(),'MNI152_T1_2mm_brain_mask.nii.gz'))

        if type(data) is list:
            data=nib.concat_images(data)

        if not isinstance(data,(nib.nifti1.Nifti1Image, nib.nifti1.Nifti1Pair)):
            raise ValueError("data is not a nibabel instance")
        self.nifti_masker = NiftiMasker(mask_img=mask)
        self.data = self.nifti_masker.fit_transform(data)

        if type(Y) is list:
            Y=np.array(Y)
        if self.data.shape[0]!= len(Y):
            raise ValueError("Y does not match the correct size of data")
        self.Y = Y

        if algorithm is not None:
            self.set_algorithm(algorithm, **kwargs)

        if cv_dict is not None:
            self.cv = set_cv(cv_dict)
Exemple #3
0
    def predict(self, algorithm=None, cv_dict=None, plot=True, **kwargs):

        """ Run prediction

        Args:
            algorithm: Algorithm to use for prediction.  Must be one of 'svm', 'svr',
            'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest',
            or 'randomforestClassifier'
            cv_dict: Type of cross_validation to use. A dictionary of
                {'type': 'kfolds', 'n_folds': n},
                {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or
                {'type': 'loso', 'subject_id': holdout},
                where n = number of folds, and subject = vector of subject ids that corresponds to self.Y
            plot: Boolean indicating whether or not to create plots.
            **kwargs: Additional keyword arguments to pass to the prediction algorithm

        Returns:
            output: a dictionary of prediction parameters

        """

        # Set algorithm
        if algorithm is not None:
            predictor_settings = set_algorithm(algorithm, **kwargs)
        else:
            # Use SVR as a default
            predictor_settings = set_algorithm('svr', **{'kernel':"linear"})

        # Initialize output dictionary
        output = {}
        output['Y'] = np.array(self.Y).flatten()
        
        # Overall Fit for weight map
        predictor = predictor_settings['predictor']
        predictor.fit(self.data, output['Y'])
        output['yfit_all'] = predictor.predict(self.data)
        if predictor_settings['prediction_type'] == 'classification':
            if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                output['prob_all'] = predictor.predict_proba(self.data)[:,1]
            else:
                output['dist_from_hyperplane_all'] = predictor.decision_function(self.data)
                if predictor_settings['algorithm'] == 'svm' and predictor.probability:
                    output['prob_all'] = predictor.predict_proba(self.data)[:,1]
       
        output['intercept'] = predictor.intercept_

        # Weight map
        output['weight_map'] = self.empty()
        if predictor_settings['algorithm'] == 'lassopcr':
            output['weight_map'].data = np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_lasso'].coef_)
        elif predictor_settings['algorithm'] == 'pcr':
            output['weight_map'].data = np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_regress'].coef_)
        else:
            output['weight_map'].data = predictor.coef_.squeeze()

        # Cross-Validation Fit
        if cv_dict is not None:
            output['cv'] = set_cv(cv_dict)

            predictor_cv = predictor_settings['predictor']
            output['yfit_xval'] = output['yfit_all'].copy()
            output['intercept_xval'] = []
            output['weight_map_xval'] = deepcopy(output['weight_map'])
            wt_map_xval = [];
            if predictor_settings['prediction_type'] == 'classification':
                if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                    output['prob_xval'] = np.zeros(len(self.Y))
                else:
                    output['dist_from_hyperplane_xval'] = np.zeros(len(self.Y))
                    if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability:
                        output['prob_xval'] = np.zeros(len(self.Y))

            for train, test in output['cv']:
                predictor_cv.fit(self.data[train], self.Y.loc[train])
                output['yfit_xval'][test] = predictor_cv.predict(self.data[test])
                if predictor_settings['prediction_type'] == 'classification':
                    if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                        output['prob_xval'][test] = predictor_cv.predict_proba(self.data[test])[:,1]
                    else:
                        output['dist_from_hyperplane_xval'][test] = predictor_cv.decision_function(self.data[test])
                        if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability:
                            output['prob_xval'][test] = predictor_cv.predict_proba(self.data[test])[:,1]
                output['intercept_xval'].append(predictor_cv.intercept_)

                # Weight map
                if predictor_settings['algorithm'] == 'lassopcr':
                    wt_map_xval.append(np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_lasso'].coef_))
                elif predictor_settings['algorithm'] == 'pcr':
                    wt_map_xval.append(np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_regress'].coef_))
                else:
                    wt_map_xval.append(predictor_cv.coef_.squeeze())
                output['weight_map_xval'].data = np.array(wt_map_xval)
        
        # Print Results
        if predictor_settings['prediction_type'] == 'classification':
            output['mcr_all'] = np.mean(output['yfit_all']==np.array(self.Y).flatten())
            print 'overall accuracy: %.2f' % output['mcr_all']
            if cv_dict is not None:
                output['mcr_xval'] = np.mean(output['yfit_xval']==np.array(self.Y).flatten())
                print 'overall CV accuracy: %.2f' % output['mcr_xval']
        elif predictor_settings['prediction_type'] == 'prediction':
            output['rmse_all'] = np.sqrt(np.mean((output['yfit_all']-output['Y'])**2))
            output['r_all'] = np.corrcoef(output['Y'],output['yfit_all'])[0,1]
            print 'overall Root Mean Squared Error: %.2f' % output['rmse_all']
            print 'overall Correlation: %.2f' % output['r_all']
            if cv_dict is not None:
                output['rmse_xval'] = np.sqrt(np.mean((output['yfit_xval']-output['Y'])**2))
                output['r_xval'] = np.corrcoef(output['Y'],output['yfit_xval'])[0,1]
                print 'overall CV Root Mean Squared Error: %.2f' % output['rmse_xval']
                print 'overall CV Correlation: %.2f' % output['r_xval']

        # Plot
        if plot:
            if cv_dict is not None:
                if predictor_settings['prediction_type'] == 'prediction':
                    fig2 = scatterplot(pd.DataFrame({'Y': output['Y'], 'yfit_xval':output['yfit_xval']}))
                elif predictor_settings['prediction_type'] == 'classification':
                    if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                        output['roc'] = Roc(input_values=output['prob_xval'], binary_outcome=output['Y'].astype('bool'))
                    else:
                        output['roc'] = Roc(input_values=output['dist_from_hyperplane_xval'], binary_outcome=output['Y'].astype('bool'))
                        if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability:
                            output['roc'] = Roc(input_values=output['prob_xval'], binary_outcome=output['Y'].astype('bool'))
                    fig2 = output['roc'].plot()
                    # output['roc'].summary()
            fig1=output['weight_map'].plot()

        return output
Exemple #4
0
    def predict(self,
                algorithm=None,
                cv_dict=None,
                save_images=True,
                save_output=True,
                save_plot=True,
                **kwargs):
        """ Run prediction

        Args:
            algorithm: Algorithm to use for prediction.  Must be one of 'svm', 'svr',
            'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest',
            or 'randomforestClassifier'
            cv_dict: Type of cross_validation to use. A dictionary of
                {'type': 'kfolds', 'n_folds': n},
                {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or
                {'type': 'loso'', 'subject_id': holdout},
                where n = number of folds, and subject = vector of subject ids that corresponds to self.Y
            save_images: Boolean indicating whether or not to save images to file.
            save_output: Boolean indicating whether or not to save prediction output to file.
            save_plot: Boolean indicating whether or not to create plots.
            **kwargs: Additional keyword arguments to pass to the prediction algorithm

        """

        if not hasattr(self, 'algorithm'):
            if algorithm is not None:
                self.set_algorithm(algorithm, **kwargs)
            else:
                raise ValueError(
                    "Make sure you specify an 'algorithm' to use.")

        # Overall Fit for weight map
        predictor = self.predictor
        predictor.fit(self.data, self.Y)
        self.yfit_all = predictor.predict(self.data)
        if self.prediction_type == 'classification':
            if self.algorithm not in [
                    'svm', 'ridgeClassifier', 'ridgeClassifierCV'
            ]:
                self.prob_all = predictor.predict_proba(self.data)
            else:
                dist_from_hyperplane_all = predictor.decision_function(
                    self.data)
                if self.algorithm == 'svm' and self.predictor.probability:
                    self.prob_all = predictor.predict_proba(self.data)

        # Cross-Validation Fit
        if cv_dict is not None:
            self.cv = set_cv(cv_dict)

        dist_from_hyperplane_xval = None

        if hasattr(self, 'cv'):
            predictor_cv = self.predictor
            self.yfit_xval = self.yfit_all.copy()
            if self.prediction_type == 'classification':
                if self.algorithm not in [
                        'svm', 'ridgeClassifier', 'ridgeClassifierCV'
                ]:
                    self.prob_xval = np.zeros(len(self.Y))
                else:
                    dist_from_hyperplane_xval = np.zeros(len(self.Y))
                    if self.algorithm == 'svm' and self.predictor.probability:
                        self.prob_xval = np.zeros(len(self.Y))

            for train, test in self.cv:
                predictor_cv.fit(self.data[train], self.Y[train])
                self.yfit_xval[test] = predictor_cv.predict(self.data[test])
                if self.prediction_type == 'classification':
                    if self.algorithm not in [
                            'svm', 'ridgeClassifier', 'ridgeClassifierCV'
                    ]:
                        self.prob_xval[test] = predictor_cv.predict_proba(
                            self.data[test])
                    else:
                        dist_from_hyperplane_xval[
                            test] = predictor_cv.decision_function(
                                self.data[test])
                        if self.algorithm == 'svm' and self.predictor.probability:
                            self.prob_xval[test] = predictor_cv.predict_proba(
                                self.data[test])

        # Save Outputs
        if save_images:
            self._save_image(predictor)

        if save_output:
            self._save_stats_output(dist_from_hyperplane_xval)

        if save_plot:
            if hasattr(self, 'cv'):
                self._save_plot(predictor_cv)
            else:
                self._save_plot(predictor)

        # Print Results
        if self.prediction_type == 'classification':
            self.mcr_all = np.mean(self.yfit_all == self.Y)
            print 'overall accuracy: %.2f' % self.mcr_all
            if hasattr(self, 'cv'):
                self.mcr_xval = np.mean(self.yfit_xval == self.Y)
                print 'overall CV accuracy: %.2f' % self.mcr_xval
        elif self.prediction_type == 'prediction':
            self.rmse_all = np.sqrt(np.mean((self.yfit_all - self.Y)**2))
            self.r_all = np.corrcoef(self.Y, self.yfit_all)[0, 1]
            print 'overall Root Mean Squared Error: %.2f' % self.rmse_all
            print 'overall Correlation: %.2f' % self.r_all
            if hasattr(self, 'cv'):
                self.rmse_xval = np.sqrt(np.mean((self.yfit_xval - self.Y)**2))
                self.r_xval = np.corrcoef(self.Y, self.yfit_xval)[0, 1]
                print 'overall CV Root Mean Squared Error: %.2f' % self.rmse_xval
                print 'overall CV Correlation: %.2f' % self.r_xval
Exemple #5
0
    def predict(self, algorithm=None, cv_dict=None, save_images=True, save_output=True,
                save_plot=True, **kwargs):

        """ Run prediction

        Args:
            algorithm: Algorithm to use for prediction.  Must be one of 'svm', 'svr',
            'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest',
            or 'randomforestClassifier'
            cv_dict: Type of cross_validation to use. A dictionary of
                {'type': 'kfolds', 'n_folds': n},
                {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or
                {'type': 'loso'', 'subject_id': holdout},
                where n = number of folds, and subject = vector of subject ids that corresponds to self.Y
            save_images: Boolean indicating whether or not to save images to file.
            save_output: Boolean indicating whether or not to save prediction output to file.
            save_plot: Boolean indicating whether or not to create plots.
            **kwargs: Additional keyword arguments to pass to the prediction algorithm

        """

        if not hasattr(self,'algorithm'):
            if algorithm is not None:
                self.set_algorithm(algorithm, **kwargs)
            else:
                raise ValueError("Make sure you specify an 'algorithm' to use.")

        # Overall Fit for weight map
        predictor = self.predictor
        predictor.fit(self.data, self.Y)
        self.yfit_all = predictor.predict(self.data)
        if self.prediction_type == 'classification':
            if self.algorithm not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                self.prob_all = predictor.predict_proba(self.data)
            else:
                dist_from_hyperplane_all = predictor.decision_function(self.data)
                if self.algorithm == 'svm' and self.predictor.probability:
                    self.prob_all = predictor.predict_proba(self.data)

        # Cross-Validation Fit
        if cv_dict is not None:
            self.cv = set_cv(cv_dict)

        dist_from_hyperplane_xval = None

        if hasattr(self, 'cv'):
            predictor_cv = self.predictor
            self.yfit_xval = self.yfit_all.copy()
            if self.prediction_type == 'classification':
                if self.algorithm not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                    self.prob_xval = np.zeros(len(self.Y))
                else:
                    dist_from_hyperplane_xval = np.zeros(len(self.Y))
                    if self.algorithm == 'svm' and self.predictor.probability:
                        self.prob_xval = np.zeros(len(self.Y))

            for train, test in self.cv:
                predictor_cv.fit(self.data[train], self.Y[train])
                self.yfit_xval[test] = predictor_cv.predict(self.data[test])
                if self.prediction_type == 'classification':
                    if self.algorithm not in ['svm','ridgeClassifier','ridgeClassifierCV']:
                        self.prob_xval[test] = predictor_cv.predict_proba(self.data[test])
                    else:
                        dist_from_hyperplane_xval[test] = predictor_cv.decision_function(self.data[test])
                        if self.algorithm == 'svm' and self.predictor.probability:
                            self.prob_xval[test] = predictor_cv.predict_proba(self.data[test])

        # Save Outputs
        if save_images:
            self._save_image(predictor)

        if save_output:
            self._save_stats_output(dist_from_hyperplane_xval)

        if save_plot:
            if hasattr(self, 'cv'):
                self._save_plot(predictor_cv)
            else:
                self._save_plot(predictor)

        # Print Results
        if self.prediction_type == 'classification':
            self.mcr_all = np.mean(self.yfit_all==self.Y)
            print 'overall accuracy: %.2f' % self.mcr_all
            if hasattr(self,'cv'):
                self.mcr_xval = np.mean(self.yfit_xval==self.Y)
                print 'overall CV accuracy: %.2f' % self.mcr_xval
        elif self.prediction_type == 'prediction':
            self.rmse_all = np.sqrt(np.mean((self.yfit_all-self.Y)**2))
            self.r_all = np.corrcoef(self.Y,self.yfit_all)[0,1]
            print 'overall Root Mean Squared Error: %.2f' % self.rmse_all
            print 'overall Correlation: %.2f' % self.r_all
            if hasattr(self,'cv'):
                self.rmse_xval = np.sqrt(np.mean((self.yfit_xval-self.Y)**2))
                self.r_xval = np.corrcoef(self.Y,self.yfit_xval)[0,1]
                print 'overall CV Root Mean Squared Error: %.2f' % self.rmse_xval
                print 'overall CV Correlation: %.2f' % self.r_xval