def __init__(self, variables, missing=np.nan, case_sensitive=False, force_retrieve=False): self.case_sensitive = case_sensitive if isinstance(variables, (list, tuple)): _vars = {} for v in variables: v = v.split('/') if v[0] not in _vars: _vars[v[0]] = [] if len(v) == 2: _vars[v[0]].append(v[1]) variables = _vars dicts = [] for k, v in variables.items(): d = fetch_dictionary(k, force_retrieve=force_retrieve) if not case_sensitive: d.index = d.index.str.lower() if v: d = d[v] d.columns = ['{}_{}'.format(k, c) for c in d.columns] dicts.append(d) # Make sure none of the dictionaries have duplicate indices drop_dups = lambda d: d[~d.index.duplicated(keep='first')] dicts = [d if d.index.is_unique else drop_dups(d) for d in dicts] dictionary = pd.concat(dicts, axis=1, join='outer', sort=False) super().__init__(dictionary, missing=missing)
def __init__(self, variables, missing=np.nan, case_sensitive=True): self.case_sensitive = case_sensitive if isinstance(variables, (list, tuple)): _vars = {} for v in variables: v = v.split('/') if v[0] not in _vars: _vars[v[0]] = [] if len(v) == 2: _vars[v[0]].append(v[1]) variables = _vars dicts = [] for k, v in variables.items(): d = fetch_dictionary(k) if not case_sensitive: d.index = d.index.str.lower() if v: d = d[v] d.columns = ['%s_%s' % (k, c) for c in d.columns] dicts.append(d) dictionary = pd.concat(dicts, axis=1, join='outer') super(PredefinedDictionaryExtractor, self).__init__(dictionary, missing=missing)