Example #1
0
def get_dataframes(csvfile, spec):
    """Extract dataframes from *csvfile* using *spec* parsing instructions.

    Arg:
      csvfile (file connection or StringIO) - CSV file for parsing
      spec (spec.Specification) - pasing instructions, defaults to spec.SPEC
    """

    # Reader.items() yeild a tuple of csv file segment and its parsing definition
    #    csv_segment - list of reader.Row instances
    #    pdef - parsing definition is specification.Definition instance
    # We construct list of Table()'s from csv_segment and identify
    # variable names and units in each table
    parsed_tables = []
    for csv_segment, pdef in Reader(csvfile, spec).items():
        tables = extract_tables(csv_segment, pdef)
        # accumulate results
        parsed_tables.extend(tables)

    # get dataframes from parsed tables
    emitter = Emitter(parsed_tables)
    dfa = emitter.get_dataframe(freq='a')
    dfq = emitter.get_dataframe(freq='q')
    dfm = emitter.get_dataframe(freq='m')
    return dfa, dfq, dfm
Example #2
0
def get_dataframes(csvfile, spec=SPEC):
    tables = [t for csv_segment, pdef in Reader(csvfile, spec).items()
              for t in extract_tables(csv_segment, pdef)]
    # print(list(tables))
    
    emitter = Emitter(tables)
    dfa = emitter.get_dataframe(freq='a')
    dfq = emitter.get_dataframe(freq='q')
    dfm = emitter.get_dataframe(freq='m')
    return dfa, dfq, dfm
Example #3
0
def get_dataframes(csvfile, spec=SPEC):
    tables = [
        t for csv_segment, pdef in Reader(csvfile, spec).items()
        for t in extract_tables(csv_segment, pdef)
    ]
    # print(list(tables))

    emitter = Emitter(tables)
    dfa = emitter.get_dataframe(freq='a')
    dfq = emitter.get_dataframe(freq='q')
    dfm = emitter.get_dataframe(freq='m')
    return dfa, dfq, dfm
Example #4
0
def get_dataframes(path, spec=PARSING_DEFINITION):
    """Extract dataframes from *csvfile* using *spec* parsing instructions.

    Args:
       csvfile (file connection or StringIO) - CSV file for parsing
       spec (spec.Specification) - pasing instructions, defaults to spec.SPEC

    Returns:
       Three pandas dataframes at annual, qtr and monthly frequencies
       in a dictionary.
    """
    jobs = get_segment_with_pdef(path, spec['default'], spec['segments'])
    tables = [
        t for csv_segment, pdef in jobs
        for t in extract_tables(csv_segment, pdef)
    ]
    emitter = Emitter(tables)
    return {freq: emitter.get_dataframe(freq) for freq in FREQUENCIES}
Example #5
0
class Test_extract_tables_function:

    tables = extract_tables(csv_segment=mock_rows(), pdef=Sample.pdef())

    # FIXME:  more functions in extract_tables other than split tables

    def test_returns_list(self):
        assert isinstance(self.tables, list)

    def test_table0_is_table_instance(self):
        t0 = self.tables[0]
        assert isinstance(t0, Table)
        assert t0 == Sample.table(0)

    def test_table0_can_be_parsed_with_label_GDP_bln_rub(self):
        t0 = self.tables[0]
        t0.set_label(varnames_dict={'Объем ВВП': 'GDP'},
                     units_dict={'млрд.рублей': 'bln_rub'})
        assert t0.label == 'GDP_bln_rub'
Example #6
0
def get_dataframes(csvfile, spec=SPEC):
    """Extract dataframes from *csvfile* using *spec* parsing instructions.

    Args:
       csvfile (file connection or StringIO) - CSV file for parsing
       spec (spec.Specification) - pasing instructions, defaults to spec.SPEC

    Returns:
       Three pandas dataframes at annual, qtr and monthly frequencies.
    """
    tables = [
        t for csv_segment, pdef in Reader(csvfile, spec).items()
        for t in extract_tables(csv_segment, pdef)
    ]
    emitter = Emitter(tables)
    dfa = emitter.get_dataframe(freq='a')
    dfq = emitter.get_dataframe(freq='q')
    dfm = emitter.get_dataframe(freq='m')
    return dfa, dfq, dfm
from csv2df.validator import Validator

# input data
csvfile1 = io.StringIO(
    """Объем ВВП, млрд.рублей / Gross domestic product, bln rubles
1999	4823	901	1102	1373	1447
2000	7306	1527	1697	2038	2044""")
# input instruction
main = Definition(units={"млрд.рублей": "bln_rub"})
main.append(varname="GDP", text="Объем ВВП", required_units=["bln_rub"])
spec1 = Specification(default=main)

# parsing result
parsed_tables = []
for csv_segment, pdef in Reader(csvfile1, spec1).items():
    tables = extract_tables(csv_segment, pdef)
    parsed_tables.extend(tables)
emitter = Emitter(parsed_tables)
dfa = emitter.get_dataframe(freq='a')
dfq = emitter.get_dataframe(freq='q')
dfm = emitter.get_dataframe(freq='m')


def test_get_dataframes():
    # csvfile1 was consumed once, buffer position if not at zero
    if csvfile1.tell() != 0:
        csvfile1.seek(0)
    dfa_, dfq_, dfm_ = get_dataframes(csvfile1, spec1)
    assert dfa_.equals(dfa)
    assert dfq_.equals(dfq)
    assert dfm_.equals(dfm)