Ejemplo n.º 1
0
    def __init__(self, data=None, name='regression', formula=None, var_transform=False, *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        self._formula = Formula(self._formula)

        self._lm = importr('stats').lm
        self._summary = importr('base').summary
Ejemplo n.º 2
0
    def __init__(self, data=None, name='Propensity Score Matching', formula=None, method='nearest',
                 distance = 'logit', var_transform=False, *args, **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建matchit
        self._formula = formula
        # 创建方法
        self._method = method
        # 创建距离
        self._distance = distance
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [(self._copy_data.columns[i],'_'.join(['var',str(i)]))
                                      for i in range(len(self._copy_data.columns))]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([('_'.join(['var',str(i)]),self._copy_data.columns[i]) for i in range(len(self._copy_data.columns))])
            self._generated_variables = [item[1] for item in self._variables_mapping]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key,self._variables_mapping_dict[key],self._formula)

        # self._formula = Formula(self._formula)

        # 导入MatchIt库
        self._renv._R.r('library("MatchIt")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary
Ejemplo n.º 3
0
    def __init__(self,
                 data=None,
                 id=None,
                 time=None,
                 name='panel data model',
                 formula=None,
                 var_transform=False,
                 model='Fixed',
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建公式
        self._formula = formula
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        #self._formula = Formula(self._formula)
        self._model = model

        # 导入plm库
        self._renv._R.r('library("plm")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        # 把数据格式转换为面板格式
        fmt_str = 'pdata <- pdata.frame(mdata, index=c("{0}","{1}"))'
        self._renv._R.r(fmt_str.format(id, time))

        self._summary = importr('base').summary
Ejemplo n.º 4
0
    def __init__(self,
                 data=None,
                 name='Propensity Score Matching',
                 formula=None,
                 method='nearest',
                 distance='logit',
                 var_transform=False,
                 *args,
                 **kwargs):
        super().__init__(data=data, name=name, *args, **kwargs)

        # 创建REnv实例
        self._renv = REnv()
        # 创建matchit
        self._formula = formula
        # 创建方法
        self._method = method
        # 创建距离
        self._distance = distance
        # 原始公式
        self._origin_formula = self._formula
        self._copy_data = deepcopy(data)

        # 转换变量,特别是那些变量是中文的
        self._var_transform = var_transform
        if self._var_transform:
            self._variables_mapping = [
                (self._copy_data.columns[i], '_'.join(['var', str(i)]))
                for i in range(len(self._copy_data.columns))
            ]
            self._variables_mapping_dict = dict(self._variables_mapping)
            self._variables_mapping_dict_reversed = dict([
                ('_'.join(['var', str(i)]), self._copy_data.columns[i])
                for i in range(len(self._copy_data.columns))
            ])
            self._generated_variables = [
                item[1] for item in self._variables_mapping
            ]
            self._copy_data.columns = self._generated_variables
            for key in self._variables_mapping_dict:
                self._formula = re.sub(key, self._variables_mapping_dict[key],
                                       self._formula)

        # self._formula = Formula(self._formula)

        # 导入MatchIt库
        self._renv._R.r('library("MatchIt")')
        # 把数据转换为R数据格式
        self._renv['mdata'] = self._renv.python_to_r_object(self._copy_data)
        self._summary = importr('base').summary
Ejemplo n.º 5
0
    '''
    stata_file = r'D:\data\test\JTRAIN.dta'
    stdata = Statadata(stata_file)
    mdata = stdata.read()
    print(mdata.columns)
    plm = importr('plm')

    reg_obj = Plm(data=mdata, formula='log(scrap)~d88+d89+grant+grant_1', id='fcode', time='year')
    call_obj = reg_obj()
    print(reg_obj.coefs)
    print(reg_obj._result['summary']['coefficients'])
    print(reg_obj)'''

    stata_file = r'D:\data\test\wagepan.dta'
    stdata = Statadata(stata_file)
    mdata = stdata.read()
    print(mdata.columns)
    plm = importr('plm')

    reg_obj = Plm(
        data=mdata,
        formula='lwage~educ+black+hisp+exper+I(exper^2)+married+union+yr',
        id='nr',
        time='year',
        model='random')
    reg_obj._renv._R.r('pdata$yr<-factor(pdata$year)')
    call_obj = reg_obj()
    print(reg_obj.coefs)
    print(reg_obj._result['summary']['coefficients'])
    print(reg_obj)