def code_labels(cls, implicit: List[str] = ['iso_country']) -> tab.DataFrame: found = [] schema: tab.Schema = {'columns': [ {'number': 1, 'name': "code", 'datatype': 'string', 'null': []}, {'number': 2, 'name': "label", 'datatype': 'string', 'null': []}, {'number': 3, 'name': "means_missing", 'datatype': 'boolean', 'null': []}, {'number': 4, 'name': "description", 'datatype': 'string', 'null': ['']}]} with cls._code_labels() as cl_dir: for scheme in cls.field_code_scheme.scheme.unique(): if scheme in implicit: continue info = (cl_dir / str(scheme)).with_suffix('.csv') skiprows = 0 if info.open().readline().startswith('#'): skiprows = 1 codes = tab.read_csv(info, schema=schema, skiprows=skiprows) codes = codes.withColumn('scheme', codes.code.const(info.stem)) if 'code' not in codes.columns or 'label' not in codes.columns: raise ValueError((info, codes.columns)) found.append(codes) all_schemes = tab.concat(found) with_fields = all_schemes.merge(cls.field_code_scheme) with_field_info = with_fields.merge(cls.field_info.select('item', 'name')) return with_field_info
def main(argv: List[str], cwd: Path_T, connect: Callable[..., Connection]) -> None: db = argv[1] ctx = DBSession(connect(db, detect_types=PARSE_COLNAMES)) for csv_file in argv[2:]: p = cwd / csv_file df = ctx.load_data_frame(p.stem, tab.read_csv(p)) log.info('%s -> %s', p, df) for ix, row in df.iterrows(): if ix >= 3: break log.info('row %d: %s', ix, row)
def read_csv(self, access: Path_T) -> 'DataFrame': df = tab.read_csv(access) return self.load_data_frame(access.stem, df)