def code_labels(cls,
                 implicit: List[str] = ['iso_country']) -> tab.DataFrame:
     found = []
     schema: tab.Schema = {'columns': [
         {'number': 1, 'name': "code", 'datatype': 'string', 'null': []},
         {'number': 2, 'name': "label", 'datatype': 'string', 'null': []},
         {'number': 3, 'name': "means_missing", 'datatype': 'boolean', 'null': []},
         {'number': 4, 'name': "description", 'datatype': 'string', 'null': ['']}]}
     with cls._code_labels() as cl_dir:
         for scheme in cls.field_code_scheme.scheme.unique():
             if scheme in implicit:
                 continue
             info = (cl_dir / str(scheme)).with_suffix('.csv')
             skiprows = 0
             if info.open().readline().startswith('#'):
                 skiprows = 1
             codes = tab.read_csv(info, schema=schema, skiprows=skiprows)
             codes = codes.withColumn('scheme', codes.code.const(info.stem))
             if 'code' not in codes.columns or 'label' not in codes.columns:
                 raise ValueError((info, codes.columns))
             found.append(codes)
     all_schemes = tab.concat(found)
     with_fields = all_schemes.merge(cls.field_code_scheme)
     with_field_info = with_fields.merge(cls.field_info.select('item', 'name'))
     return with_field_info
Exemple #2
0
def main(argv: List[str], cwd: Path_T, connect: Callable[...,
                                                         Connection]) -> None:
    db = argv[1]

    ctx = DBSession(connect(db, detect_types=PARSE_COLNAMES))
    for csv_file in argv[2:]:
        p = cwd / csv_file
        df = ctx.load_data_frame(p.stem, tab.read_csv(p))
        log.info('%s -> %s', p, df)
        for ix, row in df.iterrows():
            if ix >= 3:
                break
            log.info('row %d: %s', ix, row)
Exemple #3
0
 def read_csv(self, access: Path_T) -> 'DataFrame':
     df = tab.read_csv(access)
     return self.load_data_frame(access.stem, df)