def get_dataframes(csvfile, spec): """Extract dataframes from *csvfile* using *spec* parsing instructions. Arg: csvfile (file connection or StringIO) - CSV file for parsing spec (spec.Specification) - pasing instructions, defaults to spec.SPEC """ # Reader.items() yeild a tuple of csv file segment and its parsing definition # csv_segment - list of reader.Row instances # pdef - parsing definition is specification.Definition instance # We construct list of Table()'s from csv_segment and identify # variable names and units in each table parsed_tables = [] for csv_segment, pdef in Reader(csvfile, spec).items(): tables = extract_tables(csv_segment, pdef) # accumulate results parsed_tables.extend(tables) # get dataframes from parsed tables emitter = Emitter(parsed_tables) dfa = emitter.get_dataframe(freq='a') dfq = emitter.get_dataframe(freq='q') dfm = emitter.get_dataframe(freq='m') return dfa, dfq, dfm
def get_dataframes(csvfile, spec=SPEC): tables = [t for csv_segment, pdef in Reader(csvfile, spec).items() for t in extract_tables(csv_segment, pdef)] # print(list(tables)) emitter = Emitter(tables) dfa = emitter.get_dataframe(freq='a') dfq = emitter.get_dataframe(freq='q') dfm = emitter.get_dataframe(freq='m') return dfa, dfq, dfm
def get_dataframes(csvfile, spec=SPEC): tables = [ t for csv_segment, pdef in Reader(csvfile, spec).items() for t in extract_tables(csv_segment, pdef) ] # print(list(tables)) emitter = Emitter(tables) dfa = emitter.get_dataframe(freq='a') dfq = emitter.get_dataframe(freq='q') dfm = emitter.get_dataframe(freq='m') return dfa, dfq, dfm
def get_dataframes(path, spec=PARSING_DEFINITION): """Extract dataframes from *csvfile* using *spec* parsing instructions. Args: csvfile (file connection or StringIO) - CSV file for parsing spec (spec.Specification) - pasing instructions, defaults to spec.SPEC Returns: Three pandas dataframes at annual, qtr and monthly frequencies in a dictionary. """ jobs = get_segment_with_pdef(path, spec['default'], spec['segments']) tables = [ t for csv_segment, pdef in jobs for t in extract_tables(csv_segment, pdef) ] emitter = Emitter(tables) return {freq: emitter.get_dataframe(freq) for freq in FREQUENCIES}
class Test_extract_tables_function: tables = extract_tables(csv_segment=mock_rows(), pdef=Sample.pdef()) # FIXME: more functions in extract_tables other than split tables def test_returns_list(self): assert isinstance(self.tables, list) def test_table0_is_table_instance(self): t0 = self.tables[0] assert isinstance(t0, Table) assert t0 == Sample.table(0) def test_table0_can_be_parsed_with_label_GDP_bln_rub(self): t0 = self.tables[0] t0.set_label(varnames_dict={'Объем ВВП': 'GDP'}, units_dict={'млрд.рублей': 'bln_rub'}) assert t0.label == 'GDP_bln_rub'
def get_dataframes(csvfile, spec=SPEC): """Extract dataframes from *csvfile* using *spec* parsing instructions. Args: csvfile (file connection or StringIO) - CSV file for parsing spec (spec.Specification) - pasing instructions, defaults to spec.SPEC Returns: Three pandas dataframes at annual, qtr and monthly frequencies. """ tables = [ t for csv_segment, pdef in Reader(csvfile, spec).items() for t in extract_tables(csv_segment, pdef) ] emitter = Emitter(tables) dfa = emitter.get_dataframe(freq='a') dfq = emitter.get_dataframe(freq='q') dfm = emitter.get_dataframe(freq='m') return dfa, dfq, dfm
from csv2df.validator import Validator # input data csvfile1 = io.StringIO( """Объем ВВП, млрд.рублей / Gross domestic product, bln rubles 1999 4823 901 1102 1373 1447 2000 7306 1527 1697 2038 2044""") # input instruction main = Definition(units={"млрд.рублей": "bln_rub"}) main.append(varname="GDP", text="Объем ВВП", required_units=["bln_rub"]) spec1 = Specification(default=main) # parsing result parsed_tables = [] for csv_segment, pdef in Reader(csvfile1, spec1).items(): tables = extract_tables(csv_segment, pdef) parsed_tables.extend(tables) emitter = Emitter(parsed_tables) dfa = emitter.get_dataframe(freq='a') dfq = emitter.get_dataframe(freq='q') dfm = emitter.get_dataframe(freq='m') def test_get_dataframes(): # csvfile1 was consumed once, buffer position if not at zero if csvfile1.tell() != 0: csvfile1.seek(0) dfa_, dfq_, dfm_ = get_dataframes(csvfile1, spec1) assert dfa_.equals(dfa) assert dfq_.equals(dfq) assert dfm_.equals(dfm)