コード例 #1
0
def _get_segment_specs_no_header_doc(segment_info_yaml_filename):

    # terrible inlining 
    import os
    def _chg(path, filename):
         folder = os.path.split(path)[0]
         return os.path.join(folder, filename)
    assert _chg("temp\\_config.txt", "new.txt") == 'temp\\new.txt'
    # end
    yaml = _get_safe_yaml(segment_info_yaml_filename)
    return [[start_line, end_line, load_spec(_chg(segment_info_yaml_filename,specfile))]
            for start_line, end_line, specfile in yaml]
コード例 #2
0
def get_labelled_rows_no_segments(raw_data_file, yaml_spec_file):
    raw_rows_iter = yield_csv_rows(raw_data_file)
    spec_dicts = load_spec(yaml_spec_file)
    labelled_rows_iter = yield_valid_rows_with_labels(raw_rows_iter, spec_dicts)
    return list(labelled_rows_iter)
コード例 #3
0
def get_labelled_rows_by_segment(raw_data_file, yaml_spec_file, yaml_cfg_file):
    raw_rows = list(yield_csv_rows(raw_data_file))     
    default_dicts = load_spec(yaml_spec_file)
    segment_specs = _get_segment_specs_no_header_doc(yaml_cfg_file)
    return _label_raw_rows_by_segment(raw_rows, default_dicts, segment_specs)
コード例 #4
0
from common import docstring_to_file

# NOTE: this function is a direct query to all unique labels
from database import get_unique_labels

FILLER = "<...>"


# ----------------------------------------------------------------------------

from common import get_filenames
data_folder = "../data/ind09/"
csv, spec, cfg = get_filenames(data_folder)

from load_spec import load_spec
default_dicts = load_spec(spec)

# ----------------------------------------------------------------------------

def get_var_abbr(name):
    words = name.split('_')
    return '_'.join(itertools.takewhile(lambda word: word.isupper(), words))
assert get_var_abbr('PROD_E_TWh') == 'PROD_E' 

def get_unit_abbr(name):
    words = name.split('_')
    return '_'.join(itertools.dropwhile(lambda word: word.isupper(), words))
assert get_unit_abbr('PROD_E_TWh') == 'TWh'

# ----------------------------------------------------------------------------