def _get_segment_specs_no_header_doc(segment_info_yaml_filename): # terrible inlining import os def _chg(path, filename): folder = os.path.split(path)[0] return os.path.join(folder, filename) assert _chg("temp\\_config.txt", "new.txt") == 'temp\\new.txt' # end yaml = _get_safe_yaml(segment_info_yaml_filename) return [[start_line, end_line, load_spec(_chg(segment_info_yaml_filename,specfile))] for start_line, end_line, specfile in yaml]
def get_labelled_rows_no_segments(raw_data_file, yaml_spec_file): raw_rows_iter = yield_csv_rows(raw_data_file) spec_dicts = load_spec(yaml_spec_file) labelled_rows_iter = yield_valid_rows_with_labels(raw_rows_iter, spec_dicts) return list(labelled_rows_iter)
def get_labelled_rows_by_segment(raw_data_file, yaml_spec_file, yaml_cfg_file): raw_rows = list(yield_csv_rows(raw_data_file)) default_dicts = load_spec(yaml_spec_file) segment_specs = _get_segment_specs_no_header_doc(yaml_cfg_file) return _label_raw_rows_by_segment(raw_rows, default_dicts, segment_specs)
from common import docstring_to_file # NOTE: this function is a direct query to all unique labels from database import get_unique_labels FILLER = "<...>" # ---------------------------------------------------------------------------- from common import get_filenames data_folder = "../data/ind09/" csv, spec, cfg = get_filenames(data_folder) from load_spec import load_spec default_dicts = load_spec(spec) # ---------------------------------------------------------------------------- def get_var_abbr(name): words = name.split('_') return '_'.join(itertools.takewhile(lambda word: word.isupper(), words)) assert get_var_abbr('PROD_E_TWh') == 'PROD_E' def get_unit_abbr(name): words = name.split('_') return '_'.join(itertools.dropwhile(lambda word: word.isupper(), words)) assert get_unit_abbr('PROD_E_TWh') == 'TWh' # ----------------------------------------------------------------------------