Beispiel #1
0
    def __init__(self,
                 data=None,
                 id=None,
                 time=None,
                 name='panel data model',
                 formula=None,
                 var_transform=False,
                 model='Fixed',
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        #self._formula = Formula(self._formula)
        self._model = model

        # 导入plm库
        self._renv._R.r('library("plm")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        # 把数据格式转换为面板格式
        fmt_str = 'pdata <- pdata.frame(mdata, index=c("{0}","{1}"))'
        self._renv._R.r(fmt_str.format(id, time))

        self._summary = importr('base').summary
Beispiel #2
0
    def __init__(self,
                 data=None,
                 name='Propensity Score Matching',
                 formula=None,
                 method='nearest',
                 distance='logit',
                 var_transform=False,
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建matchit
        self._formula = formula
        # 创建方法
        self._method = method
        # 创建距离
        self._distance = distance
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        # self._formula = Formula(self._formula)

        # 导入MatchIt库
        self._renv._R.r('library("MatchIt")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary
Beispiel #3
0
    def __init__(self, data=None, name='Propensity Score Matching', formula=None, method='nearest',
                 distance = 'logit', var_transform=False, *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建matchit
        self._formula = formula
        # 创建方法
        self._method = method
        # 创建距离
        self._distance = distance
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        # self._formula = Formula(self._formula)

        # 导入MatchIt库
        self._renv._R.r('library("MatchIt")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary
Beispiel #4
0
    def __init__(self, data=None, name='regression', formula=None, var_transform=False, family='binomial(link=logit)', *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        self._family = family

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        # self._formula = Formula(self._formula)

        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary
Beispiel #5
0
    def __init__(self, data=None, name='regression', formula=None, var_transform=False, *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        self._formula = Formula(self._formula)

        self._lm = importr('stats').lm
        self._summary = importr('base').summary
Beispiel #6
0
    def __call__(self):
        if self._copy_data is not None:
            lm_obj = self._lm(self._formula, data=REnv.python_to_r_object(self._copy_data))
            self._result['lm'] = self._renv[lm_obj]
            self._result['summary'] = self._renv[self._summary(lm_obj)]
        else:
            print('Data is not defined!')
            raise Exception

        return self._result
Beispiel #7
0
    def __init__(self, data=None, func=None):
        self._Renv = REnv()

        # 导入R中的boot库
        self._Renv._R.r('library("boot")')

        # 定义函数
        self._func = '{0} <- {1}'.format('myfunc',func)
        self._Renv._R.r(self._func)

        # 导入数据
        self._Renv['data'] = REnv.python_to_r_object(data)
Beispiel #8
0
class PropensityScoreMatching(BasicModel):
    def __init__(self, data=None, name='Propensity Score Matching', formula=None, method='nearest',
                 distance = 'logit', var_transform=False, *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建matchit
        self._formula = formula
        # 创建方法
        self._method = method
        # 创建距离
        self._distance = distance
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        # self._formula = Formula(self._formula)

        # 导入MatchIt库
        self._renv._R.r('library("MatchIt")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary

    def __call__(self):
        if self._copy_data is not None:
            # estimate propensity scores and create matched data set using 'matchit'
            matching_it_str = 'match_it <- matchit({0}, data = mdata, method = "{1}", distance = "{2}")'.format(self._formula, self._method, self._distance)
            self._renv._R.r(matching_it_str)
            self._renv._R.r('psm <- match_it$nn')
            self._result['psm'] = self._renv['psm']

            self._renv._R.r('matched_data <- match.data(match_it,distance ="pscore")')
            self._result['matched_data'] = self._renv['matched_data']
        else:
            print('Data is not defined!')
            raise Exception

        return self._result

    def __repr__(self):
        dot_line = '-'*80
        title = 'Propensity Score Matching: {}\n'.format(self._origin_formula)
        return ''.join([title,
                        dot_line,'\n',
                        self._result['psm'].__repr__(),'\n',
                        dot_line,'\n'])

    @property
    def matched_data(self):
        return self._result['matched_data']
Beispiel #9
0
class Logistic(BasicModel):
    def __init__(self, data=None, name='regression', formula=None, var_transform=False, family='binomial(link=logit)', *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        self._family = family

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        # self._formula = Formula(self._formula)

        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary

    def __call__(self):
        if self._copy_data is not None:
            glm_str = 'glm({0},family={1},data=mdata)'
            print(glm_str.format(self._formula, self._family))
            glm_obj = self._renv._R.r(glm_str.format(self._formula, self._family))
            self._result['lm'] = self._renv[glm_obj]
            self._result['summary'] = self._renv[self._summary(glm_obj)]
        else:
            print('Data is not defined!')
            raise Exception

        return self._result

    def __repr__(self):
        dot_line = '-'*80
        title = 'Binary Response Regression Result: {}\n'.format(self._origin_formula)
        nobs = 'Number of Observation: {}'.format(self._data.shape[0])
        return ''.join([title,
                        nobs,'\n',
                        dot_line,'\n',
                        self.coefs.__repr__(),'\n',
                        dot_line,'\n'])

    @property
    def coefs(self):
        coefs = self._result['summary']['coefficients']
        if self._var_transform:
            indexes = coefs.index
            new_indexes = []
            for ind in indexes:
                if ind in self._variables_mapping_dict_reversed:
                    new_indexes.append(self._variables_mapping_dict_reversed[ind])
                elif ind == '(Intercept)':
                    new_indexes.append('常数')
                else:
                    new_indexes.append(ind)
            coefs.index = new_indexes
        return coefs
Beispiel #10
0
class PropensityScoreMatching(BasicModel):
    def __init__(self,
                 data=None,
                 name='Propensity Score Matching',
                 formula=None,
                 method='nearest',
                 distance='logit',
                 var_transform=False,
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建matchit
        self._formula = formula
        # 创建方法
        self._method = method
        # 创建距离
        self._distance = distance
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        # self._formula = Formula(self._formula)

        # 导入MatchIt库
        self._renv._R.r('library("MatchIt")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary

    def __call__(self):
        if self._copy_data is not None:
            # estimate propensity scores and create matched data set using 'matchit'
            matching_it_str = 'match_it <- matchit({0}, data = mdata, method = "{1}", distance = "{2}")'.format(
                self._formula, self._method, self._distance)
            self._renv._R.r(matching_it_str)
            self._renv._R.r('psm <- match_it$nn')
            self._result['psm'] = self._renv['psm']

            self._renv._R.r(
                'matched_data <- match.data(match_it,distance ="pscore")')
            self._result['matched_data'] = self._renv['matched_data']
        else:
            print('Data is not defined!')
            raise Exception

        return self._result

    def __repr__(self):
        dot_line = '-' * 80
        title = 'Propensity Score Matching: {}\n'.format(self._origin_formula)
        return ''.join([
            title, dot_line, '\n', self._result['psm'].__repr__(), '\n',
            dot_line, '\n'
        ])

    @property
    def matched_data(self):
        return self._result['matched_data']
Beispiel #11
0
class Logistic(BasicModel):
    def __init__(self,
                 data=None,
                 name='regression',
                 formula=None,
                 var_transform=False,
                 family='binomial(link=logit)',
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        self._family = family

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        # self._formula = Formula(self._formula)

        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary

    def __call__(self):
        if self._copy_data is not None:
            glm_str = 'glm({0},family={1},data=mdata)'
            print(glm_str.format(self._formula, self._family))
            glm_obj = self._renv._R.r(
                glm_str.format(self._formula, self._family))
            self._result['lm'] = self._renv[glm_obj]
            self._result['summary'] = self._renv[self._summary(glm_obj)]
        else:
            print('Data is not defined!')
            raise Exception

        return self._result

    def __repr__(self):
        dot_line = '-' * 80
        title = 'Binary Response Regression Result: {}\n'.format(
            self._origin_formula)
        nobs = 'Number of Observation: {}'.format(self._data.shape[0])
        return ''.join([
            title, nobs, '\n', dot_line, '\n',
            self.coefs.__repr__(), '\n', dot_line, '\n'
        ])

    @property
    def coefs(self):
        coefs = self._result['summary']['coefficients']
        if self._var_transform:
            indexes = coefs.index
            new_indexes = []
            for ind in indexes:
                if ind in self._variables_mapping_dict_reversed:
                    new_indexes.append(
                        self._variables_mapping_dict_reversed[ind])
                elif ind == '(Intercept)':
                    new_indexes.append('常数')
                else:
                    new_indexes.append(ind)
            coefs.index = new_indexes
        return coefs
Beispiel #12
0
class Plm(BasicModel):
    def __init__(self,
                 data=None,
                 id=None,
                 time=None,
                 name='panel data model',
                 formula=None,
                 var_transform=False,
                 model='Fixed',
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        #self._formula = Formula(self._formula)
        self._model = model

        # 导入plm库
        self._renv._R.r('library("plm")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        # 把数据格式转换为面板格式
        fmt_str = 'pdata <- pdata.frame(mdata, index=c("{0}","{1}"))'
        self._renv._R.r(fmt_str.format(id, time))

        self._summary = importr('base').summary

    def __call__(self):
        if self._copy_data is not None:
            plm_str = 'plm({0},data=pdata,model="{1}")'
            print(plm_str.format(self._formula, self._model))
            plm_obj = self._renv._R.r(
                plm_str.format(self._formula, self._model))
            self._result['plm'] = self._renv[plm_obj]
            self._result['summary'] = self._renv[self._summary(plm_obj)]
        else:
            print('Data is not defined!')
            raise Exception

        return self._result

    def __repr__(self):
        dot_line = '-' * 80
        title = 'Panel Data Result: {}\n'.format(self._origin_formula)
        nobs = 'Number of Observation: {}'.format(self._data.shape[0])
        return ''.join([
            title, nobs, '\n', dot_line, '\n',
            self.coefs.__repr__(), '\n', dot_line, '\n'
        ])

    @property
    def coefs(self):
        coefs = self._result['summary']['coefficients']
        if self._var_transform:
            indexes = coefs.index
            new_indexes = []
            for ind in indexes:
                if ind in self._variables_mapping_dict_reversed:
                    new_indexes.append(
                        self._variables_mapping_dict_reversed[ind])
                elif ind == '(Intercept)':
                    new_indexes.append('常数')
                else:
                    new_indexes.append(ind)
            coefs.index = new_indexes
        return coefs