Exemple #1
0
    def __init__(self,
                 variables,
                 missing=np.nan,
                 case_sensitive=False,
                 force_retrieve=False):

        self.case_sensitive = case_sensitive

        if isinstance(variables, (list, tuple)):
            _vars = {}
            for v in variables:
                v = v.split('/')
                if v[0] not in _vars:
                    _vars[v[0]] = []
                if len(v) == 2:
                    _vars[v[0]].append(v[1])
            variables = _vars

        dicts = []
        for k, v in variables.items():
            d = fetch_dictionary(k, force_retrieve=force_retrieve)
            if not case_sensitive:
                d.index = d.index.str.lower()
            if v:
                d = d[v]
            d.columns = ['{}_{}'.format(k, c) for c in d.columns]
            dicts.append(d)

        # Make sure none of the dictionaries have duplicate indices
        drop_dups = lambda d: d[~d.index.duplicated(keep='first')]
        dicts = [d if d.index.is_unique else drop_dups(d) for d in dicts]

        dictionary = pd.concat(dicts, axis=1, join='outer', sort=False)

        super().__init__(dictionary, missing=missing)
Exemple #2
0
    def __init__(self, variables, missing=np.nan, case_sensitive=True):

        self.case_sensitive = case_sensitive

        if isinstance(variables, (list, tuple)):
            _vars = {}
            for v in variables:
                v = v.split('/')
                if v[0] not in _vars:
                    _vars[v[0]] = []
                if len(v) == 2:
                    _vars[v[0]].append(v[1])
            variables = _vars

        dicts = []
        for k, v in variables.items():
            d = fetch_dictionary(k)
            if not case_sensitive:
                d.index = d.index.str.lower()
            if v:
                d = d[v]
            d.columns = ['%s_%s' % (k, c) for c in d.columns]
            dicts.append(d)

        dictionary = pd.concat(dicts, axis=1, join='outer')
        super(PredefinedDictionaryExtractor, self).__init__(dictionary,
                                                            missing=missing)